Skip to content

Commit a77f06b

Browse files
committed
Store run-record path specs POSIX and report platform paths
There is limited support for reading and acting on old run-records that have paths stored in platform conventions. Detection works when the path matching an existing item on the file system. Paths are always stored in POSIX notation, whenever they are relative.
1 parent 7d55562 commit a77f06b

File tree

1 file changed

+110
-1
lines changed

1 file changed

+110
-1
lines changed

datalad_next/patches/run.py

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
"""Enhance ``run()`` placeholder substitutions to honor configuration defaults
1+
"""Enhance datalad-core's ``run()``
2+
3+
Portable path handling logic for run-records
4+
--------------------------------------------
5+
6+
Placeholder substitutions to honor configuration defaults
7+
---------------------------------------------------------
28
39
Previously, ``run()`` would not recognize configuration defaults for
410
placeholder substitution. This means that any placeholders globally declared in
@@ -19,21 +25,119 @@
1925
"""
2026

2127
from itertools import filterfalse
28+
from os.path import lexists
29+
from pathlib import (
30+
PurePath,
31+
PureWindowsPath,
32+
PurePosixPath,
33+
)
2234
import sys
2335

2436
from datalad.core.local.run import (
2537
GlobbedPaths,
2638
SequenceFormatter,
2739
normalize_command,
2840
quote_cmdlinearg,
41+
_create_record as _orig_create_record,
2942
)
43+
from datalad.distribution.dataset import Dataset
44+
from datalad.local.rerun import get_run_info as _orig_get_run_info
3045
from datalad.interface.common_cfg import definitions as cfg_defs
3146
from datalad.support.constraints import EnsureStr
3247
from datalad.support.extensions import register_config
3348

3449
from . import apply_patch
3550

3651

52+
# Deals with https://github.com/datalad/datalad/issues/7512
53+
def _create_record(run_info, sidecar_flag, ds):
54+
# convert any input/output specification to a POSIX path
55+
for k in ('inputs', 'outputs'):
56+
if k not in run_info:
57+
continue
58+
run_info[k] = [_get_posix_relpath_for_runrecord(p)
59+
for p in run_info[k]]
60+
61+
return _orig_create_record(run_info, sidecar_flag, ds)
62+
63+
64+
def _get_posix_relpath_for_runrecord(path):
65+
p = PurePath(path)
66+
if p.is_absolute():
67+
# there is no point in converting an absolute path
68+
# to a different platform convention.
69+
# return as-is
70+
return path
71+
72+
return str(PurePosixPath(p))
73+
74+
75+
# Deals with https://github.com/datalad/datalad/issues/7512
76+
def get_run_info(dset, message):
77+
msg, run_info = _orig_get_run_info(dset, message)
78+
if run_info is None:
79+
# nothing to process, return as-is
80+
return msg, run_info
81+
82+
for k in ('inputs', 'outputs'):
83+
if k not in run_info:
84+
continue
85+
run_info[k] = [_get_platform_path_from_runrecord(p, dset)
86+
for p in run_info[k]]
87+
return msg, run_info
88+
89+
90+
def _get_platform_path_from_runrecord(path: str, ds: Dataset) -> PurePath:
91+
"""Helper to standardize run_info path handling
92+
93+
Previously, run-records would contain platform-paths (e.g., windows paths
94+
when added on windows, POSIX paths elsewhere). This made cross-platform
95+
rerun impossible out-of-the box, but it also means that such dataset are
96+
out there in unknown numbers.
97+
98+
This helper inspects any input/output path reported by get_run_info()
99+
and tries to ensure that it matches platform conventions.
100+
101+
Parameters
102+
----------
103+
path: str
104+
A str-path from an input/output specification
105+
ds: Dataset
106+
This dataset's base path is used for existence testing for
107+
convention determination.
108+
109+
Returns
110+
-------
111+
str
112+
"""
113+
# we only need to act differently, when an incoming path is
114+
# windows. This is not possible to say with 100% confidence,
115+
# because a POSIX path can also contain a backslash. We support
116+
# a few standard cases where we CAN tell
117+
try:
118+
pathobj = None
119+
if '\\' not in path:
120+
# no windows pathsep, no problem
121+
pathobj = PurePosixPath(path)
122+
# let's assume it is windows for a moment
123+
elif lexists(str(ds.pathobj / PureWindowsPath(path))):
124+
# if there is something on the filesystem for this path,
125+
# we can be reasonably sure that this is indeed a windows
126+
# path. This won't catch everything, but better than nothing
127+
pathobj = PureWindowsPath(path)
128+
else:
129+
# if we get here, we have no idea, and no means to verify
130+
# further hypotheses -- go with the POSIX assumption
131+
# and hope for the best
132+
pathobj = PurePosixPath(path)
133+
assert pathobj is not None
134+
except Exception:
135+
return path
136+
137+
# we report in platform-conventions
138+
return str(PurePath(pathobj))
139+
140+
37141
# This function is taken from datalad-core@a96c51c0b2794b2a2b4432ec7bd51f260cb91a37
38142
# datalad/core/local/run.py
39143
# The change has been proposed in https://github.com/datalad/datalad/pull/7509
@@ -80,6 +184,11 @@ def not_subst(x):
80184

81185
apply_patch(
82186
'datalad.core.local.run', None, 'format_command', format_command)
187+
apply_patch(
188+
'datalad.core.local.run', None, '_create_record', _create_record)
189+
apply_patch(
190+
'datalad.local.rerun', None, 'get_run_info', get_run_info)
191+
83192
register_config(
84193
'datalad.run.substitutions.python',
85194
'Substitution for {python} placeholder',

0 commit comments

Comments
 (0)