|
1 | | -"""Enhance ``run()`` placeholder substitutions to honor configuration defaults |
| 1 | +"""Enhance datalad-core's ``run()`` |
| 2 | +
|
| 3 | +Portable path handling logic for run-records |
| 4 | +-------------------------------------------- |
| 5 | +
|
| 6 | +Placeholder substitutions to honor configuration defaults |
| 7 | +--------------------------------------------------------- |
2 | 8 |
|
3 | 9 | Previously, ``run()`` would not recognize configuration defaults for |
4 | 10 | placeholder substitution. This means that any placeholders globally declared in |
|
19 | 25 | """ |
20 | 26 |
|
21 | 27 | from itertools import filterfalse |
| 28 | +from os.path import lexists |
| 29 | +from pathlib import ( |
| 30 | + PurePath, |
| 31 | + PureWindowsPath, |
| 32 | + PurePosixPath, |
| 33 | +) |
22 | 34 | import sys |
23 | 35 |
|
24 | 36 | from datalad.core.local.run import ( |
25 | 37 | GlobbedPaths, |
26 | 38 | SequenceFormatter, |
27 | 39 | normalize_command, |
28 | 40 | quote_cmdlinearg, |
| 41 | + _create_record as _orig_create_record, |
29 | 42 | ) |
| 43 | +from datalad.distribution.dataset import Dataset |
| 44 | +from datalad.local.rerun import get_run_info as _orig_get_run_info |
30 | 45 | from datalad.interface.common_cfg import definitions as cfg_defs |
31 | 46 | from datalad.support.constraints import EnsureStr |
32 | 47 | from datalad.support.extensions import register_config |
33 | 48 |
|
34 | 49 | from . import apply_patch |
35 | 50 |
|
36 | 51 |
|
| 52 | +# Deals with https://github.com/datalad/datalad/issues/7512 |
| 53 | +def _create_record(run_info, sidecar_flag, ds): |
| 54 | + # convert any input/output specification to a POSIX path |
| 55 | + for k in ('inputs', 'outputs'): |
| 56 | + if k not in run_info: |
| 57 | + continue |
| 58 | + run_info[k] = [_get_posix_relpath_for_runrecord(p) |
| 59 | + for p in run_info[k]] |
| 60 | + |
| 61 | + return _orig_create_record(run_info, sidecar_flag, ds) |
| 62 | + |
| 63 | + |
| 64 | +def _get_posix_relpath_for_runrecord(path): |
| 65 | + p = PurePath(path) |
| 66 | + if p.is_absolute(): |
| 67 | + # there is no point in converting an absolute path |
| 68 | + # to a different platform convention. |
| 69 | + # return as-is |
| 70 | + return path |
| 71 | + |
| 72 | + return str(PurePosixPath(p)) |
| 73 | + |
| 74 | + |
| 75 | +# Deals with https://github.com/datalad/datalad/issues/7512 |
| 76 | +def get_run_info(dset, message): |
| 77 | + msg, run_info = _orig_get_run_info(dset, message) |
| 78 | + if run_info is None: |
| 79 | + # nothing to process, return as-is |
| 80 | + return msg, run_info |
| 81 | + |
| 82 | + for k in ('inputs', 'outputs'): |
| 83 | + if k not in run_info: |
| 84 | + continue |
| 85 | + run_info[k] = [_get_platform_path_from_runrecord(p, dset) |
| 86 | + for p in run_info[k]] |
| 87 | + return msg, run_info |
| 88 | + |
| 89 | + |
| 90 | +def _get_platform_path_from_runrecord(path: str, ds: Dataset) -> PurePath: |
| 91 | + """Helper to standardize run_info path handling |
| 92 | +
|
| 93 | + Previously, run-records would contain platform-paths (e.g., windows paths |
| 94 | + when added on windows, POSIX paths elsewhere). This made cross-platform |
| 95 | + rerun impossible out-of-the box, but it also means that such dataset are |
| 96 | + out there in unknown numbers. |
| 97 | +
|
| 98 | + This helper inspects any input/output path reported by get_run_info() |
| 99 | + and tries to ensure that it matches platform conventions. |
| 100 | +
|
| 101 | + Parameters |
| 102 | + ---------- |
| 103 | + path: str |
| 104 | + A str-path from an input/output specification |
| 105 | + ds: Dataset |
| 106 | + This dataset's base path is used for existence testing for |
| 107 | + convention determination. |
| 108 | +
|
| 109 | + Returns |
| 110 | + ------- |
| 111 | + str |
| 112 | + """ |
| 113 | + # we only need to act differently, when an incoming path is |
| 114 | + # windows. This is not possible to say with 100% confidence, |
| 115 | + # because a POSIX path can also contain a backslash. We support |
| 116 | + # a few standard cases where we CAN tell |
| 117 | + try: |
| 118 | + pathobj = None |
| 119 | + if '\\' not in path: |
| 120 | + # no windows pathsep, no problem |
| 121 | + pathobj = PurePosixPath(path) |
| 122 | + # let's assume it is windows for a moment |
| 123 | + elif lexists(str(ds.pathobj / PureWindowsPath(path))): |
| 124 | + # if there is something on the filesystem for this path, |
| 125 | + # we can be reasonably sure that this is indeed a windows |
| 126 | + # path. This won't catch everything, but better than nothing |
| 127 | + pathobj = PureWindowsPath(path) |
| 128 | + else: |
| 129 | + # if we get here, we have no idea, and no means to verify |
| 130 | + # further hypotheses -- go with the POSIX assumption |
| 131 | + # and hope for the best |
| 132 | + pathobj = PurePosixPath(path) |
| 133 | + assert pathobj is not None |
| 134 | + except Exception: |
| 135 | + return path |
| 136 | + |
| 137 | + # we report in platform-conventions |
| 138 | + return str(PurePath(pathobj)) |
| 139 | + |
| 140 | + |
37 | 141 | # This function is taken from datalad-core@a96c51c0b2794b2a2b4432ec7bd51f260cb91a37 |
38 | 142 | # datalad/core/local/run.py |
39 | 143 | # The change has been proposed in https://github.com/datalad/datalad/pull/7509 |
@@ -80,6 +184,11 @@ def not_subst(x): |
80 | 184 |
|
81 | 185 | apply_patch( |
82 | 186 | 'datalad.core.local.run', None, 'format_command', format_command) |
| 187 | +apply_patch( |
| 188 | + 'datalad.core.local.run', None, '_create_record', _create_record) |
| 189 | +apply_patch( |
| 190 | + 'datalad.local.rerun', None, 'get_run_info', get_run_info) |
| 191 | + |
83 | 192 | register_config( |
84 | 193 | 'datalad.run.substitutions.python', |
85 | 194 | 'Substitution for {python} placeholder', |
|
0 commit comments