-
Notifications
You must be signed in to change notification settings - Fork 852
Add metaflow-diff to main CLI #2238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
#!/usr/bin/env python | ||
import os | ||
import shutil | ||
import sys | ||
from subprocess import PIPE, run | ||
from tempfile import TemporaryDirectory | ||
|
||
from metaflow._vendor import click | ||
from metaflow import Run, namespace | ||
from metaflow.cli import echo_always | ||
|
||
EXCLUSIONS = [ | ||
"metaflow/", | ||
"metaflow_extensions/", | ||
"INFO", | ||
"CONFIG_PARAMETERS", | ||
"conda.manifest", | ||
] | ||
|
||
|
||
def echo(line): | ||
echo_always(line, err=True, fg="magenta") | ||
|
||
|
||
def extract_code_package(runspec, exclusions): | ||
try: | ||
namespace(None) | ||
npow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
run = Run(runspec) | ||
echo(f"✅ Run *{runspec}* found, downloading code..") | ||
except: | ||
echo(f"❌ Run **{runspec}** not found") | ||
sys.exit(1) | ||
|
||
if run.code is None: | ||
echo( | ||
f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?" | ||
) | ||
sys.exit(1) | ||
|
||
tar = run.code.tarball | ||
members = [ | ||
m for m in tar.getmembers() if not any(m.name.startswith(x) for x in exclusions) | ||
npow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
] | ||
|
||
tmp = TemporaryDirectory() | ||
tar.extractall(tmp.name, members) | ||
return tmp | ||
|
||
|
||
def perform_diff(source_dir, target_dir=None, output=False): | ||
if target_dir is None: | ||
target_dir = os.getcwd() | ||
|
||
diffs = [] | ||
for dirpath, dirnames, filenames in os.walk(source_dir): | ||
for fname in filenames: | ||
# NOTE: the paths below need to be set up carefully | ||
# for the `patch` command to work. Better not to touch | ||
# the directories below. If you must, test that patches | ||
# work after you changes. | ||
# | ||
# target_file is the git repo in the current working directory | ||
rel = os.path.relpath(dirpath, source_dir) | ||
target_file = os.path.join(rel, fname) | ||
# source_file is the run file loaded in a tmp directory | ||
source_file = os.path.join(dirpath, fname) | ||
|
||
if sys.stdout.isatty() and not output: | ||
color = ["--color"] | ||
else: | ||
color = ["--no-color"] | ||
|
||
if os.path.exists(os.path.join(target_dir, target_file)): | ||
cmd = ( | ||
["git", "diff", "--no-index", "--exit-code"] | ||
+ color | ||
+ [ | ||
target_file, | ||
source_file, | ||
] | ||
) | ||
result = run(cmd, text=True, stdout=PIPE, cwd=target_dir) | ||
if result.returncode == 0: | ||
echo(f"✅ {target_file} is identical, skipping") | ||
npow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
continue | ||
|
||
if output: | ||
diffs.append(result.stdout) | ||
else: | ||
run(["less", "-R"], input=result.stdout, text=True) | ||
else: | ||
echo(f"❗ {target_file} not in the target directory, skipping") | ||
return diffs if output else None | ||
|
||
|
||
def run_op(runspec, op, op_args): | ||
tmp = None | ||
try: | ||
tmp = extract_code_package(runspec, EXCLUSIONS) | ||
op(tmp.name, **op_args) | ||
finally: | ||
if tmp and os.path.exists(tmp.name): | ||
shutil.rmtree(tmp.name) | ||
|
||
|
||
def run_op_diff_runs(source_run, target_run): | ||
source_tmp = None | ||
target_tmp = None | ||
try: | ||
source_tmp = extract_code_package(source_run, EXCLUSIONS) | ||
target_tmp = extract_code_package(target_run, EXCLUSIONS) | ||
perform_diff(source_tmp.name, target_tmp.name) | ||
finally: | ||
if source_tmp and os.path.exists(source_tmp.name): | ||
shutil.rmtree(source_tmp.name) | ||
if target_tmp and os.path.exists(target_tmp.name): | ||
shutil.rmtree(target_tmp.name) | ||
|
||
|
||
def op_diff(tmpdir): | ||
perform_diff(tmpdir) | ||
|
||
|
||
def op_pull(tmpdir, dst=None): | ||
if os.path.exists(dst): | ||
echo(f"❌ Directory *{dst}* already exists") | ||
else: | ||
shutil.move(tmpdir, dst) | ||
echo(f"Code downloaded to *{dst}*") | ||
|
||
|
||
def op_patch(tmpdir, dst=None): | ||
diffs = perform_diff(tmpdir, output=True) | ||
with open(dst, "w") as f: | ||
for out in diffs: | ||
out = out.replace(tmpdir, "/.") | ||
out = out.replace("+++ b/./", "+++ b/") | ||
out = out.replace("--- b/./", "--- b/") | ||
out = out.replace("--- a/./", "--- a/") | ||
out = out.replace("+++ a/./", "+++ a/") | ||
f.write(out) | ||
echo(f"Patch saved in *{dst}*") | ||
path = run( | ||
["git", "rev-parse", "--show-prefix"], text=True, stdout=PIPE | ||
).stdout.strip() | ||
if path: | ||
diropt = f" --directory={path.rstrip('/')}" | ||
else: | ||
diropt = "" | ||
echo("Apply the patch by running:") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. echo and echo_always are a bit confusing here (one calls the other with some foreground, etc). Also, not sure if we want to respect the quiet flag? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is your suggestion? get rid of |
||
echo_always( | ||
f"git apply --verbose{diropt} {dst}", highlight=True, bold=True, err=True | ||
) | ||
|
||
|
||
def register_commands(main): | ||
@main.command() | ||
@click.argument("metaflow_run") | ||
def diff(metaflow_run=None): | ||
""" | ||
Do a 'git diff' of the current directory and a Metaflow run. | ||
""" | ||
run_op(metaflow_run, op_diff, {}) | ||
|
||
@main.command() | ||
@click.argument("source_run") | ||
@click.argument("target_run") | ||
def diff_runs(source_run, target_run): | ||
""" | ||
Do a 'git diff' between two Metaflow runs. | ||
""" | ||
run_op_diff_runs(source_run, target_run) | ||
|
||
@main.command() | ||
@click.argument("metaflow_run") | ||
@click.option( | ||
"--dir", help="Destination directory (default: {runspec}_code)", default=None | ||
) | ||
def pull(metaflow_run=None, dir=None): | ||
""" | ||
Pull the code of a Metaflow run. | ||
""" | ||
if dir is None: | ||
dir = metaflow_run.lower().replace("/", "_") + "_code" | ||
run_op(metaflow_run, op_pull, {"dst": dir}) | ||
|
||
@main.command() | ||
@click.argument("metaflow_run") | ||
@click.option( | ||
"--file", help="Patch file name (default: {runspec}.patch", default=None | ||
) | ||
def patch(metaflow_run, file=None): | ||
""" | ||
Create a patch file for the current dir with a Metaflow run. | ||
""" | ||
if file is None: | ||
file = metaflow_run.lower().replace("/", "_") + ".patch" | ||
run_op(metaflow_run, op_patch, {"dst": file}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
import os | ||
import pytest | ||
import tempfile | ||
from subprocess import PIPE | ||
from unittest.mock import MagicMock, patch | ||
|
||
from metaflow.cmd.diff import ( | ||
EXCLUSIONS, | ||
extract_code_package, | ||
op_diff, | ||
op_patch, | ||
op_pull, | ||
perform_diff, | ||
run_op, | ||
) | ||
|
||
|
||
class TestMetaflowDiff: | ||
|
||
@patch("metaflow.cmd.diff.Run") | ||
@patch("metaflow.cmd.diff.namespace") | ||
def test_extract_code_package(self, mock_namespace, mock_run): | ||
mock_run.return_value.code.tarball.getmembers.return_value = [] | ||
mock_run.return_value.code.tarball.extractall = MagicMock() | ||
runspec = "HelloFlow/3" | ||
|
||
with patch( | ||
"tempfile.TemporaryDirectory", return_value=tempfile.TemporaryDirectory() | ||
) as mock_tmp: | ||
tmp = extract_code_package(runspec, EXCLUSIONS) | ||
|
||
mock_namespace.assert_called_once_with(None) | ||
mock_run.assert_called_once_with(runspec) | ||
assert os.path.exists(tmp.name) | ||
|
||
@pytest.mark.parametrize("use_tty", [True, False]) | ||
@patch("metaflow.cmd.diff.run") | ||
@patch("sys.stdout.isatty") | ||
def test_perform_diff_output_false(self, mock_isatty, mock_run, use_tty): | ||
mock_isatty.return_value = use_tty | ||
|
||
mock_process = MagicMock() | ||
mock_process.returncode = 1 | ||
mock_process.stdout = ( | ||
"--- a/file.txt\n" | ||
"+++ b/file.txt\n" | ||
"@@ -1 +1 @@\n" | ||
"-source content\n" | ||
"+target content\n" | ||
) | ||
mock_run.return_value = mock_process | ||
|
||
with tempfile.TemporaryDirectory() as source_dir, tempfile.TemporaryDirectory() as target_dir: | ||
source_file = os.path.join(source_dir, "file.txt") | ||
target_file = os.path.join(target_dir, "file.txt") | ||
with open(source_file, "w") as f: | ||
f.write("source content") | ||
with open(target_file, "w") as f: | ||
f.write("target content") | ||
|
||
perform_diff(source_dir, target_dir, output=False) | ||
|
||
# if output=False, run should be called twice: | ||
# 1. git diff | ||
# 2. less -R | ||
assert mock_run.call_count == 2 | ||
|
||
mock_run.assert_any_call( | ||
[ | ||
"git", | ||
"diff", | ||
"--no-index", | ||
"--exit-code", | ||
"--color" if use_tty else "--no-color", | ||
"./file.txt", | ||
source_file, | ||
], | ||
text=True, | ||
stdout=PIPE, | ||
cwd=target_dir, | ||
) | ||
|
||
mock_run.assert_any_call( | ||
["less", "-R"], input=mock_process.stdout, text=True | ||
) | ||
|
||
@patch("metaflow.cmd.diff.run") | ||
def test_perform_diff_output_true(self, mock_run): | ||
with tempfile.TemporaryDirectory() as source_dir, tempfile.TemporaryDirectory() as target_dir: | ||
source_file = os.path.join(source_dir, "file.txt") | ||
target_file = os.path.join(target_dir, "file.txt") | ||
with open(source_file, "w") as f: | ||
f.write("source content") | ||
with open(target_file, "w") as f: | ||
f.write("target content") | ||
|
||
perform_diff(source_dir, target_dir, output=True) | ||
|
||
assert mock_run.call_count == 1 | ||
|
||
mock_run.assert_called_once_with( | ||
[ | ||
"git", | ||
"diff", | ||
"--no-index", | ||
"--exit-code", | ||
"--no-color", | ||
"./file.txt", | ||
source_file, | ||
], | ||
text=True, | ||
stdout=PIPE, | ||
cwd=target_dir, | ||
) | ||
|
||
@patch("shutil.rmtree") | ||
@patch("metaflow.cmd.diff.extract_code_package") | ||
@patch("metaflow.cmd.diff.op_diff") | ||
def test_run_op(self, mock_op_diff, mock_extract_code_package, mock_rmtree): | ||
mock_tmp = tempfile.TemporaryDirectory() | ||
mock_extract_code_package.return_value = mock_tmp | ||
runspec = "HelloFlow/3" | ||
|
||
run_op(runspec, mock_op_diff, {}) | ||
|
||
mock_extract_code_package.assert_called_once_with(runspec, EXCLUSIONS) | ||
mock_op_diff.assert_called_once_with(mock_tmp.name) | ||
|
||
mock_rmtree.assert_any_call(mock_tmp.name) | ||
|
||
@patch("metaflow.cmd.diff.perform_diff") | ||
def test_op_patch(self, mock_perform_diff): | ||
mock_perform_diff.return_value = ["diff --git a/file.txt b/file.txt\n"] | ||
|
||
with tempfile.TemporaryDirectory() as tmpdir: | ||
patch_file = os.path.join(tmpdir, "patch.patch") | ||
|
||
op_patch(tmpdir, patch_file) | ||
|
||
mock_perform_diff.assert_called_once_with(tmpdir, output=True) | ||
with open(patch_file, "r") as f: | ||
content = f.read() | ||
assert "diff --git a/file.txt b/file.txt\n" in content | ||
|
||
|
||
if __name__ == "__main__": | ||
pytest.main([__file__]) |
Uh oh!
There was an error while loading. Please reload this page.