Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metaflow-diff to main CLI #2238

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 198 additions & 0 deletions metaflow/cmd/diff/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#!/usr/bin/env python
import os
import shutil
import sys
from subprocess import PIPE, run
from tempfile import TemporaryDirectory

from metaflow._vendor import click
from metaflow import Run, namespace
from metaflow.cli import echo_always

EXCLUSIONS = [
"metaflow/",
"metaflow_extensions/",
"INFO",
"CONFIG_PARAMETERS",
"conda.manifest",
]


def echo(line):
echo_always(line, err=True, fg="magenta")


def extract_code_package(runspec, exclusions):
try:
namespace(None)
run = Run(runspec)
echo(f"✅ Run *{runspec}* found, downloading code..")
except:
echo(f"❌ Run **{runspec}** not found")
sys.exit(1)

if run.code is None:
echo(
f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?"
)
sys.exit(1)

tar = run.code.tarball
members = [
m for m in tar.getmembers() if not any(m.name.startswith(x) for x in exclusions)
]

tmp = TemporaryDirectory()
tar.extractall(tmp.name, members)
return tmp


def perform_diff(source_dir, target_dir=None, output=False):
if target_dir is None:
target_dir = os.getcwd()

diffs = []
for dirpath, dirnames, filenames in os.walk(source_dir):
for fname in filenames:
# NOTE: the paths below need to be set up carefully
# for the `patch` command to work. Better not to touch
# the directories below. If you must, test that patches
# work after you changes.
#
# target_file is the git repo in the current working directory
rel = os.path.relpath(dirpath, source_dir)
target_file = os.path.join(rel, fname)
# source_file is the run file loaded in a tmp directory
source_file = os.path.join(dirpath, fname)

if sys.stdout.isatty() and not output:
color = ["--color"]
else:
color = ["--no-color"]

if os.path.exists(os.path.join(target_dir, target_file)):
cmd = (
["git", "diff", "--no-index", "--exit-code"]
+ color
+ [
target_file,
source_file,
]
)
result = run(cmd, text=True, stdout=PIPE, cwd=target_dir)
if result.returncode == 0:
echo(f"✅ {target_file} is identical, skipping")
continue

if output:
diffs.append(result.stdout)
else:
run(["less", "-R"], input=result.stdout, text=True)
else:
echo(f"❗ {target_file} not in the target directory, skipping")
return diffs if output else None


def run_op(runspec, op, op_args):
tmp = None
try:
tmp = extract_code_package(runspec, EXCLUSIONS)
op(tmp.name, **op_args)
finally:
if tmp and os.path.exists(tmp.name):
shutil.rmtree(tmp.name)


def run_op_diff_runs(source_run, target_run):
source_tmp = None
target_tmp = None
try:
source_tmp = extract_code_package(source_run, EXCLUSIONS)
target_tmp = extract_code_package(target_run, EXCLUSIONS)
perform_diff(source_tmp.name, target_tmp.name)
finally:
if source_tmp and os.path.exists(source_tmp.name):
shutil.rmtree(source_tmp.name)
if target_tmp and os.path.exists(target_tmp.name):
shutil.rmtree(target_tmp.name)


def op_diff(tmpdir):
perform_diff(tmpdir)


def op_pull(tmpdir, dst=None):
if os.path.exists(dst):
echo(f"❌ Directory *{dst}* already exists")
else:
shutil.move(tmpdir, dst)
echo(f"Code downloaded to *{dst}*")


def op_patch(tmpdir, dst=None):
diffs = perform_diff(tmpdir, output=True)
with open(dst, "w") as f:
for out in diffs:
out = out.replace(tmpdir, "/.")
out = out.replace("+++ b/./", "+++ b/")
out = out.replace("--- b/./", "--- b/")
out = out.replace("--- a/./", "--- a/")
out = out.replace("+++ a/./", "+++ a/")
f.write(out)
echo(f"Patch saved in *{dst}*")
path = run(
["git", "rev-parse", "--show-prefix"], text=True, stdout=PIPE
).stdout.strip()
if path:
diropt = f" --directory={path.rstrip('/')}"
else:
diropt = ""
echo("Apply the patch by running:")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

echo and echo_always are a bit confusing here (one calls the other with some foreground, etc). Also, not sure if we want to respect the quiet flag?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is your suggestion? get rid of echo?

echo_always(
f"git apply --verbose{diropt} {dst}", highlight=True, bold=True, err=True
)


def register_commands(main):
@main.command()
@click.argument("metaflow_run")
def diff(metaflow_run=None):
"""
Do a 'git diff' of the current directory and a Metaflow run.
"""
run_op(metaflow_run, op_diff, {})

@main.command()
@click.argument("source_run")
@click.argument("target_run")
def diff_runs(source_run, target_run):
"""
Do a 'git diff' between two Metaflow runs.
"""
run_op_diff_runs(source_run, target_run)

@main.command()
@click.argument("metaflow_run")
@click.option(
"--dir", help="Destination directory (default: {runspec}_code)", default=None
)
def pull(metaflow_run=None, dir=None):
"""
Pull the code of a Metaflow run.
"""
if dir is None:
dir = metaflow_run.lower().replace("/", "_") + "_code"
run_op(metaflow_run, op_pull, {"dst": dir})

@main.command()
@click.argument("metaflow_run")
@click.option(
"--file", help="Patch file name (default: {runspec}.patch", default=None
)
def patch(metaflow_run, file=None):
"""
Create a patch file for the current dir with a Metaflow run.
"""
if file is None:
file = metaflow_run.lower().replace("/", "_") + ".patch"
run_op(metaflow_run, op_patch, {"dst": file})
2 changes: 2 additions & 0 deletions metaflow/cmd/main_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from metaflow.plugins.datastores.local_storage import LocalStorage
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONTACT_INFO
from metaflow.metaflow_version import get_version
from metaflow.cmd import diff

from .util import echo_always
import metaflow.tracing as tracing
Expand Down Expand Up @@ -70,6 +71,7 @@ def status():
]

process_cmds(globals())
diff.register_commands(main)


@click.command(
Expand Down
147 changes: 147 additions & 0 deletions test/cmd/diff/test_metaflow_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import os
import pytest
import tempfile
from subprocess import PIPE
from unittest.mock import MagicMock, patch

from metaflow.cmd.diff import (
EXCLUSIONS,
extract_code_package,
op_diff,
op_patch,
op_pull,
perform_diff,
run_op,
)


class TestMetaflowDiff:

@patch("metaflow.cmd.diff.Run")
@patch("metaflow.cmd.diff.namespace")
def test_extract_code_package(self, mock_namespace, mock_run):
mock_run.return_value.code.tarball.getmembers.return_value = []
mock_run.return_value.code.tarball.extractall = MagicMock()
runspec = "HelloFlow/3"

with patch(
"tempfile.TemporaryDirectory", return_value=tempfile.TemporaryDirectory()
) as mock_tmp:
tmp = extract_code_package(runspec, EXCLUSIONS)

mock_namespace.assert_called_once_with(None)
mock_run.assert_called_once_with(runspec)
assert os.path.exists(tmp.name)

@pytest.mark.parametrize("use_tty", [True, False])
@patch("metaflow.cmd.diff.run")
@patch("sys.stdout.isatty")
def test_perform_diff_output_false(self, mock_isatty, mock_run, use_tty):
mock_isatty.return_value = use_tty

mock_process = MagicMock()
mock_process.returncode = 1
mock_process.stdout = (
"--- a/file.txt\n"
"+++ b/file.txt\n"
"@@ -1 +1 @@\n"
"-source content\n"
"+target content\n"
)
mock_run.return_value = mock_process

with tempfile.TemporaryDirectory() as source_dir, tempfile.TemporaryDirectory() as target_dir:
source_file = os.path.join(source_dir, "file.txt")
target_file = os.path.join(target_dir, "file.txt")
with open(source_file, "w") as f:
f.write("source content")
with open(target_file, "w") as f:
f.write("target content")

perform_diff(source_dir, target_dir, output=False)

# if output=False, run should be called twice:
# 1. git diff
# 2. less -R
assert mock_run.call_count == 2

mock_run.assert_any_call(
[
"git",
"diff",
"--no-index",
"--exit-code",
"--color" if use_tty else "--no-color",
"./file.txt",
source_file,
],
text=True,
stdout=PIPE,
cwd=target_dir,
)

mock_run.assert_any_call(
["less", "-R"], input=mock_process.stdout, text=True
)

@patch("metaflow.cmd.diff.run")
def test_perform_diff_output_true(self, mock_run):
with tempfile.TemporaryDirectory() as source_dir, tempfile.TemporaryDirectory() as target_dir:
source_file = os.path.join(source_dir, "file.txt")
target_file = os.path.join(target_dir, "file.txt")
with open(source_file, "w") as f:
f.write("source content")
with open(target_file, "w") as f:
f.write("target content")

perform_diff(source_dir, target_dir, output=True)

assert mock_run.call_count == 1

mock_run.assert_called_once_with(
[
"git",
"diff",
"--no-index",
"--exit-code",
"--no-color",
"./file.txt",
source_file,
],
text=True,
stdout=PIPE,
cwd=target_dir,
)

@patch("shutil.rmtree")
@patch("metaflow.cmd.diff.extract_code_package")
@patch("metaflow.cmd.diff.op_diff")
def test_run_op(self, mock_op_diff, mock_extract_code_package, mock_rmtree):
mock_tmp = tempfile.TemporaryDirectory()
mock_extract_code_package.return_value = mock_tmp
runspec = "HelloFlow/3"

run_op(runspec, mock_op_diff, {})

mock_extract_code_package.assert_called_once_with(runspec, EXCLUSIONS)
mock_op_diff.assert_called_once_with(mock_tmp.name)

mock_rmtree.assert_any_call(mock_tmp.name)

@patch("metaflow.cmd.diff.perform_diff")
def test_op_patch(self, mock_perform_diff):
mock_perform_diff.return_value = ["diff --git a/file.txt b/file.txt\n"]

with tempfile.TemporaryDirectory() as tmpdir:
patch_file = os.path.join(tmpdir, "patch.patch")

op_patch(tmpdir, patch_file)

mock_perform_diff.assert_called_once_with(tmpdir, output=True)
with open(patch_file, "r") as f:
content = f.read()
assert "diff --git a/file.txt b/file.txt\n" in content


if __name__ == "__main__":
pytest.main([__file__])
Loading