Skip to content

Commit da1b9ff

Browse files
Copilotdaohu527
andcommitted
Implement autocopy_tool: CLI data copy tool with rsync and local transfer support
Co-authored-by: daohu527 <10419854+daohu527@users.noreply.github.com>
1 parent d9bd9ed commit da1b9ff

File tree

16 files changed

+596
-0
lines changed

16 files changed

+596
-0
lines changed

autocopy_tool/__init__.py

Whitespace-only changes.

autocopy_tool/config.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
source:
2+
type: network
3+
base_path: /mnt/autodrive_data
4+
host: 10.10.10.5
5+
username: tester
6+
# password: secret # Not recommended; prefer SSH key authentication
7+
# ssh_key: ~/.ssh/id_rsa
8+
protocol: rsync
9+
10+
targets:
11+
- /data/test_today/
12+
13+
rules:
14+
log:
15+
path: log/
16+
filter: "{module}/{date}"
17+
bag:
18+
path: bag/
19+
filter: "{date}"
20+
map:
21+
path: map/
22+
filter: "{name}"
23+
conf:
24+
path: conf/
25+
filter: "{name}"

autocopy_tool/main.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#!/usr/bin/env python3
2+
"""Autonomous Data Copy Tool – CLI entry point.
3+
4+
Examples::
5+
6+
python main.py --type log --date 2025-11-04 --module perception
7+
python main.py --type bag --date 2025-11-04
8+
python main.py --type map --name shanghai_ring
9+
python main.py --type conf --name default --local
10+
"""
11+
import argparse
12+
import os
13+
import subprocess
14+
import sys
15+
from pathlib import Path
16+
17+
import yaml
18+
19+
from autocopy_tool.modules.filters import build_source_path
20+
from autocopy_tool.modules.local_transfer import local_copy
21+
from autocopy_tool.modules.rsync_transfer import rsync_copy
22+
from autocopy_tool.utils.logger import get_logger
23+
from autocopy_tool.utils.time_utils import today, validate_date
24+
25+
logger = get_logger(__name__)
26+
27+
_DEFAULT_CONFIG = Path(__file__).parent / "config.yml"
28+
29+
30+
def load_config(path: str = str(_DEFAULT_CONFIG)) -> dict:
31+
"""Load and return the YAML configuration file.
32+
33+
Args:
34+
path: Path to the YAML configuration file.
35+
36+
Returns:
37+
Parsed configuration as a dictionary.
38+
"""
39+
with open(path, encoding="utf-8") as fh:
40+
return yaml.safe_load(fh)
41+
42+
43+
def parse_args(argv=None) -> argparse.Namespace:
44+
"""Parse and return command-line arguments."""
45+
parser = argparse.ArgumentParser(
46+
description="Autonomous Data Copy Tool",
47+
formatter_class=argparse.RawDescriptionHelpFormatter,
48+
epilog=__doc__,
49+
)
50+
parser.add_argument(
51+
"--type",
52+
required=True,
53+
choices=["log", "bag", "map", "conf"],
54+
help="Data type to copy.",
55+
)
56+
parser.add_argument(
57+
"--date",
58+
default=None,
59+
help="Date filter in YYYY-MM-DD format (default: today). Used for log/bag types.",
60+
)
61+
parser.add_argument(
62+
"--module",
63+
default=None,
64+
help="Module name for log copies (e.g. perception).",
65+
)
66+
parser.add_argument(
67+
"--name",
68+
default=None,
69+
help="Name identifier for map/conf copies.",
70+
)
71+
parser.add_argument(
72+
"--target",
73+
default=None,
74+
help="Override destination directory (default: first target in config.yml).",
75+
)
76+
parser.add_argument(
77+
"--config",
78+
default=str(_DEFAULT_CONFIG),
79+
help="Path to configuration YAML file.",
80+
)
81+
parser.add_argument(
82+
"--local",
83+
action="store_true",
84+
help="Use local filesystem copy instead of rsync (source must be locally mounted).",
85+
)
86+
return parser.parse_args(argv)
87+
88+
89+
def main(argv=None) -> int:
90+
"""Main entry point.
91+
92+
Returns:
93+
Exit code (0 on success, non-zero on failure).
94+
"""
95+
args = parse_args(argv)
96+
97+
try:
98+
cfg = load_config(args.config)
99+
except FileNotFoundError:
100+
logger.error("Configuration file not found: %s", args.config)
101+
return 1
102+
except yaml.YAMLError as exc:
103+
logger.error("Failed to parse configuration file: %s", exc)
104+
return 1
105+
106+
# Resolve date (default to today for date-based types)
107+
date = args.date
108+
if date is not None:
109+
try:
110+
date = validate_date(date)
111+
except ValueError as exc:
112+
logger.error("%s", exc)
113+
return 1
114+
else:
115+
date = today()
116+
117+
# Build source path
118+
src = build_source_path(
119+
cfg,
120+
args.type,
121+
date=date,
122+
module=args.module or "",
123+
name=args.name or "",
124+
)
125+
126+
# Determine destination
127+
dst = args.target
128+
if dst is None:
129+
targets = cfg.get("targets", [])
130+
if not targets:
131+
logger.error("No targets defined in configuration.")
132+
return 1
133+
dst = targets[0]
134+
135+
source_cfg = cfg["source"]
136+
137+
try:
138+
if args.local:
139+
local_copy(src, dst)
140+
else:
141+
rsync_copy(
142+
src=src,
143+
dst=dst,
144+
host=source_cfg["host"],
145+
user=source_cfg["username"],
146+
ssh_key=source_cfg.get("ssh_key"),
147+
)
148+
except (FileNotFoundError, PermissionError, subprocess.CalledProcessError, OSError) as exc:
149+
logger.error("Transfer failed: %s", exc)
150+
return 1
151+
152+
logger.info("✅ Copy completed: %s -> %s", src, dst)
153+
return 0
154+
155+
156+
if __name__ == "__main__":
157+
sys.exit(main())

autocopy_tool/modules/__init__.py

Whitespace-only changes.

autocopy_tool/modules/filters.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""Path filter helpers for autocopy_tool.
2+
3+
Builds source sub-paths from rule patterns defined in ``config.yml``.
4+
"""
5+
import os
6+
from typing import Any
7+
8+
9+
def build_source_path(cfg: dict, data_type: str, **kwargs: Any) -> str:
10+
"""Build the full source path for a given data type and filter parameters.
11+
12+
The rule's ``filter`` field is a Python format string whose placeholders
13+
are populated from *kwargs*. Unknown or missing keys default to an empty
14+
string so that optional parameters (e.g. ``module`` for ``bag`` transfers)
15+
do not raise a :class:`KeyError`.
16+
17+
Args:
18+
cfg: Parsed configuration dictionary.
19+
data_type: One of ``log``, ``bag``, ``map``, ``conf``.
20+
**kwargs: Filter parameters (``date``, ``module``, ``name``, …).
21+
22+
Returns:
23+
Absolute source path string.
24+
25+
Raises:
26+
KeyError: If *data_type* is not present in ``cfg["rules"]``.
27+
"""
28+
rule = cfg["rules"][data_type]
29+
# Fill template; missing keys default to empty string
30+
class _DefaultDict(dict):
31+
def __missing__(self, key):
32+
return ""
33+
34+
pattern = rule["filter"].format_map(_DefaultDict(kwargs))
35+
# Strip any leading slashes that result from empty placeholder values
36+
# so that os.path.join does not treat the pattern as an absolute path.
37+
return os.path.join(cfg["source"]["base_path"], rule["path"], pattern.lstrip("/"))
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""Local file copy transfer module for autocopy_tool.
2+
3+
Used when the source is already accessible on the local filesystem
4+
(e.g. a mounted NAS share or a USB drive).
5+
"""
6+
import os
7+
import shutil
8+
from pathlib import Path
9+
10+
from autocopy_tool.utils.logger import get_logger
11+
12+
logger = get_logger(__name__)
13+
14+
15+
def local_copy(src: str, dst: str) -> None:
16+
"""Copy files or a directory tree from a local source to a destination.
17+
18+
If *src* is a directory the entire tree is copied recursively.
19+
If *src* is a file it is copied into *dst* (creating *dst* if needed).
20+
21+
Args:
22+
src: Source file or directory path.
23+
dst: Destination directory path.
24+
25+
Raises:
26+
FileNotFoundError: If *src* does not exist.
27+
"""
28+
src_path = Path(src)
29+
if not src_path.exists():
30+
raise FileNotFoundError(f"Source path does not exist: {src}")
31+
32+
os.makedirs(dst, exist_ok=True)
33+
34+
if src_path.is_dir():
35+
dest_path = Path(dst) / src_path.name
36+
shutil.copytree(src, dest_path, dirs_exist_ok=True)
37+
logger.info("Directory copied: %s -> %s", src, dest_path)
38+
else:
39+
shutil.copy2(src, dst)
40+
logger.info("File copied: %s -> %s", src, dst)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""rsync-based file transfer module for autocopy_tool."""
2+
import os
3+
import shlex
4+
import subprocess
5+
from typing import Optional
6+
7+
from autocopy_tool.utils.logger import get_logger
8+
9+
logger = get_logger(__name__)
10+
11+
12+
def rsync_copy(
13+
src: str,
14+
dst: str,
15+
host: str,
16+
user: str,
17+
ssh_key: Optional[str] = None,
18+
extra_args: Optional[list] = None,
19+
) -> None:
20+
"""Transfer files from a remote host using rsync over SSH.
21+
22+
Args:
23+
src: Source path on the remote host.
24+
dst: Destination directory on the local host.
25+
host: Remote hostname or IP address.
26+
user: Remote username.
27+
ssh_key: Optional path to an SSH private key file.
28+
extra_args: Optional list of additional rsync arguments.
29+
30+
Raises:
31+
subprocess.CalledProcessError: If rsync exits with a non-zero status.
32+
"""
33+
os.makedirs(dst, exist_ok=True)
34+
35+
ssh_parts = ["ssh"]
36+
if ssh_key:
37+
# shlex.quote ensures the key path is safely escaped
38+
ssh_parts += ["-i", shlex.quote(ssh_key)]
39+
ssh_cmd = " ".join(ssh_parts)
40+
41+
# user@host:src is passed as a single list element – subprocess does not
42+
# invoke a shell, so special characters in the individual arguments do not
43+
# enable command injection via shell word splitting.
44+
cmd = ["rsync", "-avz", "--progress", f"-e={ssh_cmd}", f"{user}@{host}:{src}", dst]
45+
if extra_args:
46+
cmd.extend(extra_args)
47+
48+
logger.info("Running: %s", " ".join(cmd))
49+
subprocess.run(cmd, check=True)
50+
logger.info("rsync transfer completed: %s@%s:%s -> %s", user, host, src, dst)

autocopy_tool/utils/__init__.py

Whitespace-only changes.

autocopy_tool/utils/logger.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""Structured logging utility for autocopy_tool."""
2+
import logging
3+
import sys
4+
from pathlib import Path
5+
6+
7+
def get_logger(name: str, log_file: str = None, level: int = logging.INFO) -> logging.Logger:
8+
"""Return a configured logger.
9+
10+
Args:
11+
name: Logger name (typically ``__name__`` of the calling module).
12+
log_file: Optional path to write log output to a file.
13+
level: Logging level (default: INFO).
14+
15+
Returns:
16+
Configured :class:`logging.Logger` instance.
17+
"""
18+
logger = logging.getLogger(name)
19+
if logger.handlers:
20+
return logger
21+
22+
logger.setLevel(level)
23+
formatter = logging.Formatter(
24+
fmt="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
25+
datefmt="%Y-%m-%d %H:%M:%S",
26+
)
27+
28+
# Console handler
29+
console_handler = logging.StreamHandler(sys.stdout)
30+
console_handler.setFormatter(formatter)
31+
logger.addHandler(console_handler)
32+
33+
# Optional file handler
34+
if log_file:
35+
Path(log_file).parent.mkdir(parents=True, exist_ok=True)
36+
file_handler = logging.FileHandler(log_file, encoding="utf-8")
37+
file_handler.setFormatter(formatter)
38+
logger.addHandler(file_handler)
39+
40+
return logger

autocopy_tool/utils/time_utils.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""Date and time utilities for autocopy_tool."""
2+
import datetime
3+
4+
5+
def today() -> str:
6+
"""Return today's date as a ``YYYY-MM-DD`` string."""
7+
return datetime.date.today().isoformat()
8+
9+
10+
def validate_date(date_str: str) -> str:
11+
"""Validate and normalise a date string to ``YYYY-MM-DD`` format.
12+
13+
Args:
14+
date_str: Date string to validate (e.g. ``"2025-11-04"``).
15+
16+
Returns:
17+
The validated date string in ``YYYY-MM-DD`` format.
18+
19+
Raises:
20+
ValueError: If *date_str* cannot be parsed as a valid date.
21+
"""
22+
try:
23+
parsed = datetime.date.fromisoformat(date_str)
24+
except ValueError as exc:
25+
raise ValueError(
26+
f"Invalid date '{date_str}'. Expected format: YYYY-MM-DD"
27+
) from exc
28+
return parsed.isoformat()

0 commit comments

Comments
 (0)