22"""Filesystem related util functions.
33"""
44from __future__ import annotations
5- from typing import Union , Iterable , Callable
5+ from typing import Optional , Union , Iterable , Callable
66import os
77import sys
88import re
2020HOME = Path .home ()
2121
2222
23- def copy_if_exists (src : str , dst : str = HOME ) -> bool :
23+ def copy_if_exists (src : str , dst : Union [ str , Path ] = HOME ) -> bool :
2424 """Copy a file.
2525 No exception is thrown if the source file does not exist.
2626
@@ -37,7 +37,9 @@ def copy_if_exists(src: str, dst: str = HOME) -> bool:
3737 return False
3838
3939
40- def link_if_exists (src : str , dst : str = HOME , target_is_directory : bool = True ) -> bool :
40+ def link_if_exists (
41+ src : str , dst : Union [str , Path ] = HOME , target_is_directory : bool = True
42+ ) -> bool :
4143 """Make a symbolic link of a file.
4244 No exception is thrown if the source file does not exist.
4345
@@ -217,7 +219,7 @@ def find_data_tables(
217219def _find_data_tables_file (file , filter_ , patterns ) -> set [str ]:
218220 if isinstance (file , str ):
219221 file = Path (file )
220- text = file .read_text ().lower ()
222+ text = file .read_text (encoding = "utf-8" ).lower ()
221223 patterns = {
222224 r"from\s+(\w+)\W*\s*" ,
223225 r"from\s+(\w+\.\w+)\W*\s*" ,
@@ -313,7 +315,7 @@ def find_ess_empty(path: Union[str, Path], ignore: Callable = _ignore) -> list[P
313315
314316
315317def _find_ess_empty (
316- path : Path , ignore : Callable , ess_empty : dict [Path , bool ], ess_empty_dir : list [str ]
318+ path : Path , ignore : Callable , ess_empty : dict [Path , bool ], ess_empty_dir : list [Path ]
317319):
318320 if is_ess_empty (path = path , ignore = ignore , ess_empty = ess_empty ):
319321 ess_empty_dir .append (path )
@@ -326,7 +328,9 @@ def _find_ess_empty(
326328
327329
328330def is_ess_empty (
329- path : Path , ignore : Callable = _ignore , ess_empty : dict [Path , bool ] = None
331+ path : Path ,
332+ ignore : Callable = _ignore ,
333+ ess_empty : Optional [dict [Path , bool ]] = None
330334):
331335 """Check if a directory is essentially empty.
332336
@@ -365,9 +369,9 @@ def is_ess_empty(
365369
366370def update_file (
367371 path : Path ,
368- regex : list [tuple [str , str ]] = None ,
369- exact : list [tuple [str , str ]] = None ,
370- append : Union [str , Iterable [str ]] = None ,
372+ regex : Optional [ list [tuple [str , str ] ]] = None ,
373+ exact : Optional [ list [tuple [str , str ] ]] = None ,
374+ append : Union [None , str , Iterable [str ]] = None ,
371375 exist_skip : bool = True ,
372376) -> None :
373377 """Update a text file using regular expression substitution.
@@ -382,7 +386,7 @@ def update_file(
382386 """
383387 if isinstance (path , str ):
384388 path = Path (path )
385- text = path .read_text ()
389+ text = path .read_text (encoding = "utf-8" )
386390 if regex :
387391 for pattern , replace in regex :
388392 text = re .sub (pattern , replace , text )
@@ -394,7 +398,7 @@ def update_file(
394398 append = "\n " .join (append )
395399 if not exist_skip or append not in text :
396400 text += append
397- path .write_text (text )
401+ path .write_text (text , encoding = "utf-8" )
398402
399403
400404def get_files (dir_ : Union [str , Path ], exts : Union [str , list [str ]]) -> Iterable [Path ]:
@@ -581,7 +585,9 @@ def prune_json(input: Union[str, Path], output: Union[str, Path] = ""):
581585 else :
582586 output = input .with_name (input .stem + "_prune.json" )
583587 skip = False
584- with input .open ("r" ) as fin , output .open ("w" ) as fout :
588+ with input .open ("r" , encoding = "utf-8" ) as fin , output .open (
589+ "w" , encoding = "utf-8"
590+ ) as fout :
585591 for line in fin :
586592 line = line .strip ()
587593 if line == '"value_counts": {' :
@@ -601,7 +607,7 @@ def _filter_num(path: Union[str, Path], pattern: str, num_lines: int):
601607 results = []
602608 res = []
603609 count = 0
604- for line in path .open ():
610+ for line in path .open (encoding = "utf-8" ):
605611 if count > 0 :
606612 res .append (line )
607613 count -= 1
@@ -623,7 +629,7 @@ def _filter_sp(path: Union[str, Path], pattern: str, sub_pattern: str):
623629 results = []
624630 res = []
625631 sub = False
626- for line in path .open ():
632+ for line in path .open (encoding = "utf-8" ):
627633 if sub :
628634 if re .search (sub_pattern , line ):
629635 res .append (line )
0 commit comments