From 97efb3fdbf3814579a6c8f8da0ad05e5190bf1b4 Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Wed, 5 Feb 2025 01:40:01 -0800 Subject: [PATCH 1/3] WIP: Add the ability to package more libraries --- metaflow/client/core.py | 8 +- metaflow/extension_support/__init__.py | 2 +- metaflow/extension_support/_empty_file.py | 4 +- metaflow/info_file.py | 25 - metaflow/metaflow_config.py | 10 +- metaflow/metaflow_version.py | 2 +- metaflow/package.py | 203 -------- metaflow/package/__init__.py | 157 +++++++ metaflow/package/mfenv.py | 512 +++++++++++++++++++++ metaflow/plugins/package_cli.py | 14 +- metaflow/plugins/pypi/conda_decorator.py | 15 +- metaflow/special_files.py | 41 ++ metaflow/user_configs/config_options.py | 5 +- metaflow/user_configs/config_parameters.py | 4 - metaflow/util.py | 5 +- 15 files changed, 754 insertions(+), 253 deletions(-) delete mode 100644 metaflow/info_file.py delete mode 100644 metaflow/package.py create mode 100644 metaflow/package/__init__.py create mode 100644 metaflow/package/mfenv.py create mode 100644 metaflow/special_files.py diff --git a/metaflow/client/core.py b/metaflow/client/core.py index 4edbcdac00c..4d3ea0676f8 100644 --- a/metaflow/client/core.py +++ b/metaflow/client/core.py @@ -32,11 +32,12 @@ from metaflow.includefile import IncludedFile from metaflow.metaflow_config import DEFAULT_METADATA, MAX_ATTEMPTS from metaflow.metaflow_environment import MetaflowEnvironment +from metaflow.package.mfenv import MFEnv from metaflow.plugins import ENVIRONMENTS, METADATA_PROVIDERS +from metaflow.special_files import SpecialFile from metaflow.unbounded_foreach import CONTROL_TASK_TAG from metaflow.util import cached_property, is_stringish, resolve_identity, to_unicode -from ..info_file import INFO_FILE from .filecache import FileCache if TYPE_CHECKING: @@ -824,10 +825,7 @@ def __init__(self, flow_name: str, code_package: str): ) code_obj = BytesIO(blobdata) self._tar = tarfile.open(fileobj=code_obj, mode="r:gz") - # The JSON module in Python3 deals with Unicode. Tar gives bytes. - info_str = ( - self._tar.extractfile(os.path.basename(INFO_FILE)).read().decode("utf-8") - ) + info_str = MFEnv.get_archive_content(self._tar, SpecialFile.INFO_FILE) self._info = json.loads(info_str) self._flowspec = self._tar.extractfile(self._info["script"]).read() diff --git a/metaflow/extension_support/__init__.py b/metaflow/extension_support/__init__.py index 0cc9e00afae..5de8de66929 100644 --- a/metaflow/extension_support/__init__.py +++ b/metaflow/extension_support/__init__.py @@ -12,7 +12,7 @@ from itertools import chain from pathlib import Path -from metaflow.info_file import read_info_file +from metaflow.special_files import read_info_file # diff --git a/metaflow/extension_support/_empty_file.py b/metaflow/extension_support/_empty_file.py index d59e1556ddb..dbdcba34c17 100644 --- a/metaflow/extension_support/_empty_file.py +++ b/metaflow/extension_support/_empty_file.py @@ -1,2 +1,2 @@ -# This file serves as a __init__.py for metaflow_extensions when it is packaged -# and needs to remain empty. +# This file serves as a __init__.py for metaflow_extensions or metaflow +# packages when they are packaged and needs to remain empty. diff --git a/metaflow/info_file.py b/metaflow/info_file.py deleted file mode 100644 index 6d56a6152ba..00000000000 --- a/metaflow/info_file.py +++ /dev/null @@ -1,25 +0,0 @@ -import json - -from os import path - -CURRENT_DIRECTORY = path.dirname(path.abspath(__file__)) -INFO_FILE = path.join(path.dirname(CURRENT_DIRECTORY), "INFO") - -_info_file_content = None -_info_file_present = None - - -def read_info_file(): - global _info_file_content - global _info_file_present - if _info_file_present is None: - _info_file_present = path.exists(INFO_FILE) - if _info_file_present: - try: - with open(INFO_FILE, "r", encoding="utf-8") as contents: - _info_file_content = json.load(contents) - except IOError: - pass - if _info_file_present: - return _info_file_content - return None diff --git a/metaflow/metaflow_config.py b/metaflow/metaflow_config.py index 79a42620431..01092158cc8 100644 --- a/metaflow/metaflow_config.py +++ b/metaflow/metaflow_config.py @@ -446,7 +446,15 @@ ### # Debug configuration ### -DEBUG_OPTIONS = ["subcommand", "sidecar", "s3client", "tracing", "stubgen", "userconf"] +DEBUG_OPTIONS = [ + "subcommand", + "sidecar", + "s3client", + "tracing", + "stubgen", + "userconf", + "package", +] for typ in DEBUG_OPTIONS: vars()["DEBUG_%s" % typ.upper()] = from_conf("DEBUG_%s" % typ.upper(), False) diff --git a/metaflow/metaflow_version.py b/metaflow/metaflow_version.py index e3be8ed7956..92dab210bdf 100644 --- a/metaflow/metaflow_version.py +++ b/metaflow/metaflow_version.py @@ -11,7 +11,7 @@ from os import path, name, environ, listdir from metaflow.extension_support import update_package_info -from metaflow.info_file import CURRENT_DIRECTORY, read_info_file +from metaflow.special_files import read_info_file # True/False correspond to the value `public`` in get_version diff --git a/metaflow/package.py b/metaflow/package.py deleted file mode 100644 index 1385883d5a7..00000000000 --- a/metaflow/package.py +++ /dev/null @@ -1,203 +0,0 @@ -import importlib -import os -import sys -import tarfile -import time -import json -from io import BytesIO - -from .user_configs.config_parameters import CONFIG_FILE, dump_config_values -from .extension_support import EXT_PKG, package_mfext_all -from .metaflow_config import DEFAULT_PACKAGE_SUFFIXES -from .exception import MetaflowException -from .util import to_unicode -from . import R -from .info_file import INFO_FILE - -DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",") -METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"] - - -class NonUniqueFileNameToFilePathMappingException(MetaflowException): - headline = "Non Unique file path for a file name included in code package" - - def __init__(self, filename, file_paths, lineno=None): - msg = ( - "Filename %s included in the code package includes multiple different paths for the same name : %s.\n" - "The `filename` in the `add_to_package` decorator hook requires a unique `file_path` to `file_name` mapping" - % (filename, ", ".join(file_paths)) - ) - super().__init__(msg=msg, lineno=lineno) - - -# this is os.walk(follow_symlinks=True) with cycle detection -def walk_without_cycles(top_root): - seen = set() - - def _recurse(root): - for parent, dirs, files in os.walk(root): - for d in dirs: - path = os.path.join(parent, d) - if os.path.islink(path): - # Breaking loops: never follow the same symlink twice - # - # NOTE: this also means that links to sibling links are - # not followed. In this case: - # - # x -> y - # y -> oo - # oo/real_file - # - # real_file is only included twice, not three times - reallink = os.path.realpath(path) - if reallink not in seen: - seen.add(reallink) - for x in _recurse(path): - yield x - yield parent, files - - for x in _recurse(top_root): - yield x - - -class MetaflowPackage(object): - def __init__(self, flow, environment, echo, suffixes=DEFAULT_SUFFIXES_LIST): - self.suffixes = list(set().union(suffixes, DEFAULT_SUFFIXES_LIST)) - self.environment = environment - self.metaflow_root = os.path.dirname(__file__) - - self.flow_name = flow.name - self._flow = flow - self.create_time = time.time() - environment.init_environment(echo) - for step in flow: - for deco in step.decorators: - deco.package_init(flow, step.__name__, environment) - self.blob = self._make() - - def _walk(self, root, exclude_hidden=True, suffixes=None): - if suffixes is None: - suffixes = [] - root = to_unicode(root) # handle files/folder with non ascii chars - prefixlen = len("%s/" % os.path.dirname(root)) - for ( - path, - files, - ) in walk_without_cycles(root): - if exclude_hidden and "/." in path: - continue - # path = path[2:] # strip the ./ prefix - # if path and (path[0] == '.' or './' in path): - # continue - for fname in files: - if (fname[0] == "." and fname in suffixes) or ( - fname[0] != "." - and any(fname.endswith(suffix) for suffix in suffixes) - ): - p = os.path.join(path, fname) - yield p, p[prefixlen:] - - def path_tuples(self): - """ - Returns list of (path, arcname) to be added to the job package, where - `arcname` is the alternative name for the file in the package. - """ - # We want the following contents in the tarball - # Metaflow package itself - for path_tuple in self._walk( - self.metaflow_root, exclude_hidden=False, suffixes=METAFLOW_SUFFIXES_LIST - ): - yield path_tuple - - # Metaflow extensions; for now, we package *all* extensions but this may change - # at a later date; it is possible to call `package_mfext_package` instead of - # `package_mfext_all` but in that case, make sure to also add a - # metaflow_extensions/__init__.py file to properly "close" the metaflow_extensions - # package and prevent other extensions from being loaded that may be - # present in the rest of the system - for path_tuple in package_mfext_all(): - yield path_tuple - - # Any custom packages exposed via decorators - deco_module_paths = {} - for step in self._flow: - for deco in step.decorators: - for path_tuple in deco.add_to_package(): - file_path, file_name = path_tuple - # Check if the path is not duplicated as - # many steps can have the same packages being imported - if file_name not in deco_module_paths: - deco_module_paths[file_name] = file_path - yield path_tuple - elif deco_module_paths[file_name] != file_path: - raise NonUniqueFileNameToFilePathMappingException( - file_name, [deco_module_paths[file_name], file_path] - ) - - # the package folders for environment - for path_tuple in self.environment.add_to_package(): - yield path_tuple - if R.use_r(): - # the R working directory - for path_tuple in self._walk( - "%s/" % R.working_dir(), suffixes=self.suffixes - ): - yield path_tuple - # the R package - for path_tuple in R.package_paths(): - yield path_tuple - else: - # the user's working directory - flowdir = os.path.dirname(os.path.abspath(sys.argv[0])) + "/" - for path_tuple in self._walk(flowdir, suffixes=self.suffixes): - yield path_tuple - - def _add_configs(self, tar): - buf = BytesIO() - buf.write(json.dumps(dump_config_values(self._flow)).encode("utf-8")) - self._add_file(tar, os.path.basename(CONFIG_FILE), buf) - - def _add_info(self, tar): - buf = BytesIO() - buf.write( - json.dumps( - self.environment.get_environment_info(include_ext_info=True) - ).encode("utf-8") - ) - self._add_file(tar, os.path.basename(INFO_FILE), buf) - - @staticmethod - def _add_file(tar, filename, buf): - info = tarfile.TarInfo(filename) - buf.seek(0) - info.size = len(buf.getvalue()) - # Setting this default to Dec 3, 2019 - info.mtime = 1575360000 - tar.addfile(info, buf) - - def _make(self): - def no_mtime(tarinfo): - # a modification time change should not change the hash of - # the package. Only content modifications will. - # Setting this default to Dec 3, 2019 - tarinfo.mtime = 1575360000 - return tarinfo - - buf = BytesIO() - with tarfile.open( - fileobj=buf, mode="w:gz", compresslevel=3, dereference=True - ) as tar: - self._add_info(tar) - self._add_configs(tar) - for path, arcname in self.path_tuples(): - tar.add(path, arcname=arcname, recursive=False, filter=no_mtime) - - blob = bytearray(buf.getvalue()) - blob[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts - return blob - - def __str__(self): - return "" % ( - self.flow_name, - time.strftime("%a, %d %b %Y %H:%M:%S", self.create_time), - ) diff --git a/metaflow/package/__init__.py b/metaflow/package/__init__.py new file mode 100644 index 00000000000..74c763bd536 --- /dev/null +++ b/metaflow/package/__init__.py @@ -0,0 +1,157 @@ +import os +import sys +import tarfile +import time +import json +from io import BytesIO + + +from ..metaflow_config import DEFAULT_PACKAGE_SUFFIXES +from ..exception import MetaflowException +from ..special_files import SpecialFile +from ..user_configs.config_parameters import dump_config_values +from .. import R + +from .mfenv import MFEnv + +DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",") + + +class NonUniqueFileNameToFilePathMappingException(MetaflowException): + headline = "Non-unique file path for a file name included in code package" + + def __init__(self, filename, file_paths, lineno=None): + msg = ( + "Filename %s included in the code package includes multiple different " + "paths for the same name : %s.\n" + "The `filename` in the `add_to_package` decorator hook requires a unique " + "`file_path` to `file_name` mapping" % (filename, ", ".join(file_paths)) + ) + super().__init__(msg=msg, lineno=lineno) + + +class MetaflowPackage(object): + def __init__(self, flow, environment, echo, suffixes=DEFAULT_SUFFIXES_LIST): + self.suffixes = list(set().union(suffixes, DEFAULT_SUFFIXES_LIST)) + self.environment = environment + self.metaflow_root = os.path.dirname(__file__) + + self.flow_name = flow.name + self._flow = flow + self.create_time = time.time() + environment.init_environment(echo) + for step in flow: + for deco in step.decorators: + deco.package_init(flow, step.__name__, environment) + + self._code_env = MFEnv(lambda x: hasattr(x, "METAFLOW_PACKAGE")) + + # Add special content + self._code_env.add_special_content( + SpecialFile.INFO_FILE, + json.dumps( + self.environment.get_environment_info(include_ext_info=True) + ).encode("utf-8"), + ) + + self._code_env.add_special_content( + SpecialFile.CONFIG_FILE, + json.dumps(dump_config_values(self._flow)).encode("utf-8"), + ) + + # Add user files (from decorators) -- we add these to the code environment + self._code_env.add_files(self._addl_files()) + + self.blob = self._make() + + def path_tuples(self): + # Package the environment + for path, arcname in self._code_env.files(): + yield path, arcname + for _, arcname in self._code_env.contents(): + yield f"{arcname}", arcname + + # Package the user code + for path, arcname in self._user_code_tuples(): + yield path, arcname + + def _addl_files(self): + # Look at all decorators that provide additional files + deco_module_paths = {} + for step in self._flow: + for deco in step.decorators: + for path_tuple in deco.add_to_package(): + file_path, file_name = path_tuple + # Check if the path is not duplicated as + # many steps can have the same packages being imported + if file_name not in deco_module_paths: + deco_module_paths[file_name] = file_path + yield path_tuple + elif deco_module_paths[file_name] != file_path: + raise NonUniqueFileNameToFilePathMappingException( + file_name, [deco_module_paths[file_name], file_path] + ) + + # the package folders for environment + for path_tuple in self.environment.add_to_package(): + yield path_tuple + + def _user_code_tuples(self): + if R.use_r(): + # the R working directory + for path_tuple in MFEnv.walk( + "%s/" % R.working_dir(), suffixes=self.suffixes + ): + yield path_tuple + # the R package + for path_tuple in R.package_paths(): + yield path_tuple + else: + # the user's working directory + flowdir = os.path.dirname(os.path.abspath(sys.argv[0])) + "/" + for path_tuple in MFEnv.walk(flowdir, suffixes=self.suffixes): + # TODO: This is where we will check if the file is already included + # in the mfenv portion using path_in_archive. If it is, we just need to + # include a symlink. + yield path_tuple + + @staticmethod + def _add_file(tar, filename, buf): + info = tarfile.TarInfo(filename) + buf.seek(0) + info.size = len(buf.getvalue()) + # Setting this default to Dec 3, 2019 + info.mtime = 1575360000 + tar.addfile(info, buf) + + def _make(self): + def no_mtime(tarinfo): + # a modification time change should not change the hash of + # the package. Only content modifications will. + # Setting this default to Dec 3, 2019 + tarinfo.mtime = 1575360000 + return tarinfo + + buf = BytesIO() + with tarfile.open( + fileobj=buf, mode="w:gz", compresslevel=3, dereference=True + ) as tar: + # Package the environment + for path, arcname in self._code_env.files(): + tar.add(path, arcname=arcname, recursive=False, filter=no_mtime) + for content, arcname in self._code_env.contents(): + self._add_file(tar, arcname, BytesIO(content)) + + # Package the user code + for path, arcname in self._user_code_tuples(): + tar.add(path, arcname=arcname, recursive=False, filter=no_mtime) + + blob = bytearray(buf.getvalue()) + blob[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts + return blob + + def __str__(self): + return "" % ( + self.flow_name, + time.strftime("%a, %d %b %Y %H:%M:%S", self.create_time), + ) diff --git a/metaflow/package/mfenv.py b/metaflow/package/mfenv.py new file mode 100644 index 00000000000..4e08517302c --- /dev/null +++ b/metaflow/package/mfenv.py @@ -0,0 +1,512 @@ +import inspect +import os +import sys +import tarfile + +from collections import defaultdict +from dataclasses import dataclass +from pathlib import Path +from typing import ( + Callable, + Dict, + Generator, + Iterator, + List, + Mapping, + Optional, + Set, + Tuple, + TYPE_CHECKING, + Union, +) + +from types import ModuleType + + +from ..debug import debug +from ..extension_support import EXT_EXCLUDE_SUFFIXES, metadata, package_mfext_all + +from ..special_files import MFENV_DIR, SpecialFile +from ..util import get_metaflow_root, to_unicode + +packages_distributions = None + +if sys.version_info[:2] >= (3, 10): + packages_distributions = metadata.packages_distributions +else: + # This is the code present in 3.10+ -- we replicate here for other versions + def _packages_distributions() -> Mapping[str, List[str]]: + """ + Return a mapping of top-level packages to their + distributions. + """ + pkg_to_dist = defaultdict(list) + for dist in metadata.distributions(): + for pkg in _top_level_declared(dist) or _top_level_inferred(dist): + pkg_to_dist[pkg].append(dist.metadata["Name"]) + return dict(pkg_to_dist) + + def _top_level_declared(dist: metadata.Distribution) -> List[str]: + return (dist.read_text("top_level.txt") or "").split() + + def _topmost(name: "pathlib.PurePosixPath") -> Optional[str]: + """ + Return the top-most parent as long as there is a parent. + """ + top, *rest = name.parts + return top if rest else None + + def _get_toplevel_name(name: "pathlib.PurePosixPath") -> str: + return _topmost(name) or ( + # python/typeshed#10328 + inspect.getmodulename(name) # type: ignore + or str(name) + ) + + def _top_level_inferred(dist: "metadata.Distribution"): + opt_names = set(map(_get_toplevel_name, dist.files or [])) + + def importable_name(name): + return "." not in name + + return filter(importable_name, opt_names) + + packages_distributions = _packages_distributions + + +if TYPE_CHECKING: + import pathlib + + +_cached_distributions = None + + +def modules_to_distributions() -> Dict[str, List[metadata.Distribution]]: + """ + Return a mapping of top-level modules to their distributions. + + Returns + ------- + Dict[str, List[metadata.Distribution]] + A mapping of top-level modules to their distributions. + """ + global _cached_distributions + if _cached_distributions is None: + _cached_distributions = { + k: [metadata.distribution(d) for d in v] + for k, v in packages_distributions().items() + } + return _cached_distributions + + +@dataclass +class _ModuleInfo: + name: str + root_paths: Set[str] + module: ModuleType + + +class MFEnv: + + METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"] + + # this is os.walk(follow_symlinks=True) with cycle detection + @classmethod + def walk_without_cycles( + cls, + top_root: str, + ) -> Generator[Tuple[str, List[str]], None, None]: + seen = set() + + def _recurse(root): + for parent, dirs, files in os.walk(root): + for d in dirs: + path = os.path.join(parent, d) + if os.path.islink(path): + # Breaking loops: never follow the same symlink twice + # + # NOTE: this also means that links to sibling links are + # not followed. In this case: + # + # x -> y + # y -> oo + # oo/real_file + # + # real_file is only included twice, not three times + reallink = os.path.realpath(path) + if reallink not in seen: + seen.add(reallink) + for x in _recurse(path): + yield x + yield parent, files + + for x in _recurse(top_root): + yield x + + @classmethod + def walk( + cls, + root: str, + exclude_hidden: bool = True, + suffixes: Optional[List[str]] = None, + ) -> Generator[Tuple[str, str], None, None]: + if suffixes is None: + suffixes = [] + root = to_unicode(root) # handle files/folder with non ascii chars + prefixlen = len("%s/" % os.path.dirname(root)) + for ( + path, + files, + ) in cls.walk_without_cycles(root): + if exclude_hidden and "/." in path: + continue + # path = path[2:] # strip the ./ prefix + # if path and (path[0] == '.' or './' in path): + # continue + for fname in files: + if (fname[0] == "." and fname in suffixes) or ( + fname[0] != "." + and any(fname.endswith(suffix) for suffix in suffixes) + ): + p = os.path.join(path, fname) + yield p, p[prefixlen:] + + @classmethod + def get_filename(cls, name: Union[SpecialFile, str]) -> Optional[str]: + # In all cases, the special files are siblings of the metaflow root + # directory. + if isinstance(name, SpecialFile): + r = get_metaflow_root() + path_to_file = os.path.join(r, name.value) + else: + path_to_file = os.path.join(MFENV_DIR, name) + if os.path.isfile(path_to_file): + return path_to_file + return None + + @classmethod + def get_content(cls, name: Union[SpecialFile, str]) -> Optional[str]: + file_to_read = cls.get_filename(name) + if file_to_read: + with open(file_to_read, "r", encoding="utf-8") as f: + return f.read() + return None + + @classmethod + def get_archive_filename( + cls, archive: tarfile.TarFile, name: Union[SpecialFile, str] + ) -> Optional[str]: + # Backward compatible way of accessing all special files. Prior to MFEnv, they + # were stored at the TL of the archive. + real_name = name.value if isinstance(name, SpecialFile) else name + if archive.getmember(MFENV_DIR): + file_path = os.path.join(MFENV_DIR, real_name) + else: + file_path = real_name + if archive.getmember(file_path): + return file_path + return None + + @classmethod + def get_archive_content( + cls, archive: tarfile.TarFile, name: Union[SpecialFile, str] + ) -> Optional[str]: + file_to_read = cls.get_archive_filename(archive, name) + if file_to_read: + with archive.extractfile(file_to_read) as f: + return f.read().decode("utf-8") + return None + + def __init__(self, criteria: Callable[[ModuleType], bool]) -> None: + # Look at top-level modules that are present when MFEnv is initialized + modules = filter(lambda x: "." not in x[0], sys.modules.items()) + + # Determine the version of Metaflow that we are part of + self._metaflow_root = get_metaflow_root() + + self._modules = { + name: _ModuleInfo( + name, + set(Path(p).resolve().as_posix() for p in getattr(mod, "__path__", [])), + mod, + ) + for name, mod in dict(modules).items() + } # type: Dict[str, Set[str]] + + # Filter the modules + self._modules = { + name: info for name, info in self._modules.items() if criteria(info.module) + } + + # Contain metadata information regarding the distributions packaged. + # This allows Metaflow to "fake" distribution information when packaged + self._metainfo = {} # type: Dict[str, Dict[str, str]] + + # Maps an absolute path on the filesystem to the path of the file in the + # archive. + self._files = {} # type: Dict[str, str] + + self._content = {} # type: Dict[SpecialFile, bytes] + + debug.package_exec(f"Used system modules found: {str(self._modules)}") + + # Populate with files from the third party modules + for k, v in self._modules.items(): + self._files.update(self._module_files(k, v.root_paths)) + + # We include Metaflow as well + self._files.update(self._metaflow_distribution_files()) + + # Include extensions as well + self._files.update(self._metaflow_extension_files()) + + @property + def root_dir(self): + return MFENV_DIR + + def add_special_content(self, name: SpecialFile, content: bytes) -> None: + """ + Add a special file to the MF environment. + + This file will be included in the resulting code package in `MFENV_DIR`. + + Parameters + ---------- + name : SpecialFile + The special file to add to the MF environment + content : bytes + The content of the special file + """ + debug.package_exec(f"Adding special content {name.value} to the MF environment") + self._content[name] = content + + def add_module(self, module: ModuleType) -> None: + """ + Add a module to the MF environment. + + This module will be included in the resulting code package in `MFENV_DIR`. + + Parameters + ---------- + module : ModuleType + The module to include in the MF environment + """ + name = module.__name__ + debug.package_exec(f"Adding module {name} to the MF environment") + # If the module is a single file, we handle this here by looking at __file__ + # which will point to the single file. If it is an actual module, __path__ + # will contain the path(s) to the module + self._modules[name] = _ModuleInfo( + name, + set( + Path(p).resolve().as_posix() + for p in getattr(module, __path__, module.__file__) + ), + module, + ) + self._files.update(self._module_files(name, self._modules[name].root_paths)) + + def add_directory( + self, + directory: str, + criteria: Callable[[str], bool], + ) -> None: + """ + Add a directory to the MF environment. + + This directory will be included in the resulting code package in `MFENV_DIR`. + You can optionally specify a criteria function that takes a file path and + returns a boolean indicating whether or not the file should be included in the + code package. + + At runtime, the content of the directory will be accessible through the usual + PYTHONPATH mechanism but also through `current.envdir`. + + Parameters + ---------- + directory : str + The directory to include in the MF environment + criteria : Callable[[str], bool] + A function that takes a file path and returns a boolean indicating whether or + not the file should be included in the code package + """ + name = os.path.basename(directory) + debug.package_exec(f"Adding directory {directory} to the MF environment") + for root, _, files in os.walk(directory): + for file in files: + if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES): + continue + path = os.path.join(root, file) + relpath = os.path.relpath(path, directory) + path = os.path.realpath(path) + if criteria(path): + self._files[path] = os.path.join(name, relpath) + + def add_files(self, files: Iterator[Tuple[str, str]]) -> None: + """ + Add a list of files to the MF environment. + + These files will be included in the resulting code package in `MFENV_DIR`. + + + Parameters + ---------- + files : Iterator[Tuple[str, str]] + A list of files to include in the MF environment. The first element of the + tuple is the path to the file in the filesystem; the second element is the + path in the archive. + """ + for file, arcname in files: + debug.package_exec(f"Adding file {file} as {arcname} to the MF environment") + self._files[os.path.realpath(file)] = os.path.join(MFENV_DIR, arcname) + + def path_in_archive(self, path: str) -> Optional[str]: + """ + Return the path of the file in the code package if it is included through + add_directory or add_files. + + Note that we will use realpath to determine if two paths are equal. + This includes all files included as part of third party libraries as well as + anything that was added as part of `add_files` and `add_directory`. + + Parameters + ---------- + path : str + The path of the file on the filesystem + + Returns + ------- + Optional[str] + The path of the file in the code package or None if the file is not included + """ + return self._files.get(os.path.realpath(path)) + + def files(self) -> Generator[Tuple[str, str], None, None]: + """ + Return a generator of all files included in the MF environment. + + Returns + ------- + Generator[Tuple[str, str], None, None] + A generator of all files included in the MF environment. The first element of + the tuple is the path to the file in the filesystem; the second element is the + path in the archive. + """ + return self._files.items() + + def contents(self) -> Generator[Tuple[bytes, str], None, None]: + """ + Return a generator of all special files included in the MF environment. + + Returns + ------- + Generator[Tuple[bytes, str], None, None] + A generator of all special files included in the MF environment. The first + element of the tuple is the content to add; the second element is path in the + archive. + """ + for name, content in self._content.items(): + yield content, os.path.join(MFENV_DIR, name.value) + + def _module_files( + self, name: str, paths: Set[str] + ) -> Generator[Tuple[str, str], None, None]: + debug.package_exec( + f" Looking for distributions for module {name} in {paths}" + ) + paths = set(paths) # Do not modify external paths + has_init = False + distributions = modules_to_distributions().get(name) + prefix = f"{name}/" + init_file = f"{prefix}__init__.py" + + seen_distributions = set() + if distributions: + for dist in distributions: + dist_name = dist.metadata["Name"] # dist.name not always present + if dist_name in seen_distributions: + continue + # For some reason, sometimes the same distribution appears twice. We + # don't need to process twice. + seen_distributions.add(dist_name) + debug.package_exec( + f" Including distribution {dist_name} for module {name}" + ) + dist_root = dist.locate_file(name) + if dist_root not in paths: + # This is an error because it means that this distribution is + # not contributing to the module. + raise RuntimeError( + f"Distribution '{dist.metadata['Name']}' is not " + "contributing to module '{name}' as expected." + ) + paths.discard(dist_root) + if dist_name not in self._metainfo: + # Possible that a distribution contributes to multiple modules + self._metainfo[dist_name] = { + # We can add more if needed but these are likely the most + # useful (captures, name, version, etc and files which can + # be used to find non-python files in the distribution). + "METADATA": dist.read_text("METADATA"), + "RECORD": dist.read_text("RECORD"), + } + for file in dist.files or []: + # Skip files that do not belong to this module (distribution may + # provide multiple modules) + if not file.startswith(prefix): + continue + if file == init_file: + has_init = True + yield str(dist.locate(file).resolve().as_posix()), os.path.join( + MFENV_DIR, str(file) + ) + + # Now if there are more paths left in paths, it means there is a non-distribution + # component to this package which we also include. + debug.package_exec( + f" Looking for non-distribution files for module {name} in {paths}" + ) + for path in paths: + if not Path(path).is_dir(): + # Single file for the module -- this will be something like .py + yield path, os.path.join(MFENV_DIR, os.path.basename(path)) + else: + for root, _, files in os.walk(path): + for file in files: + if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES): + continue + rel_path = os.path.relpath(os.path.join(root, file), path) + if rel_path == "__init__.py": + has_init = True + yield os.path.join(root, file), os.path.join( + MFENV_DIR, + name, + rel_path, + ) + # We now include an empty __init__.py file to close the module and prevent + # leaks from possible namespace packages + if not has_init: + yield os.path.join( + self._metaflow_root, "metaflow", "extension_support", "_empty_file.py" + ), os.path.join(MFENV_DIR, name, "__init__.py") + + def _metaflow_distribution_files(self) -> Generator[Tuple[str, str], None, None]: + debug.package_exec( + f" Including Metaflow from {self._metaflow_root} to the MF Environment" + ) + for path_tuple in self.walk( + os.path.join(self._metaflow_root, "metaflow"), + exclude_hidden=False, + suffixes=self.METAFLOW_SUFFIXES_LIST, + ): + yield path_tuple[0], os.path.join(MFENV_DIR, path_tuple[1]) + + def _metaflow_extension_files(self) -> Generator[Tuple[str, str], None, None]: + # Metaflow extensions; for now, we package *all* extensions but this may change + # at a later date; it is possible to call `package_mfext_package` instead of + # `package_mfext_all` but in that case, make sure to also add a + # metaflow_extensions/__init__.py file to properly "close" the metaflow_extensions + # package and prevent other extensions from being loaded that may be + # present in the rest of the system + for path_tuple in package_mfext_all(): + yield path_tuple[0], os.path.join(MFENV_DIR, path_tuple[1]) diff --git a/metaflow/plugins/package_cli.py b/metaflow/plugins/package_cli.py index 2e6519f862f..418e03ac68a 100644 --- a/metaflow/plugins/package_cli.py +++ b/metaflow/plugins/package_cli.py @@ -38,14 +38,24 @@ def info(obj): @package.command(help="List files included in the code package.") +@click.option( + "--archive/--no-archive", + default=False, + help="If True, lists the file paths as present in the tarball. " + "If False, lists the files on the filesystem.", + show_default=True, +) @click.pass_obj -def list(obj): +def list(obj, archive=False): obj.echo( "Files included in the code package " "(change with --package-suffixes):", fg="magenta", bold=False, ) - obj.echo_always("\n".join(path for path, _ in obj.package.path_tuples())) + if archive: + obj.echo_always("\n".join(path for _, path in obj.package.path_tuples())) + else: + obj.echo_always("\n".join(path for path, _ in obj.package.path_tuples())) @package.command(help="Save the current code package in a tar file") diff --git a/metaflow/plugins/pypi/conda_decorator.py b/metaflow/plugins/pypi/conda_decorator.py index b1b7ee833d9..5396ddf8788 100644 --- a/metaflow/plugins/pypi/conda_decorator.py +++ b/metaflow/plugins/pypi/conda_decorator.py @@ -10,10 +10,10 @@ from metaflow.extension_support import EXT_PKG from metaflow.metadata_provider import MetaDatum from metaflow.metaflow_environment import InvalidEnvironmentException +from metaflow.package.mfenv import MFEnv +from metaflow.special_files import SpecialFile from metaflow.util import get_metaflow_root -from ...info_file import INFO_FILE - class CondaStepDecorator(StepDecorator): """ @@ -159,11 +159,11 @@ def runtime_init(self, flow, graph, package, run_id): os.path.join(self.metaflow_dir.name, "metaflow"), ) - info = os.path.join(get_metaflow_root(), os.path.basename(INFO_FILE)) + info = MFEnv.get_filename(SpecialFile.INFO_FILE) # Symlink the INFO file as well to properly propagate down the Metaflow version - if os.path.isfile(info): + if info: os.symlink( - info, os.path.join(self.metaflow_dir.name, os.path.basename(INFO_FILE)) + info, os.path.join(self.metaflow_dir.name, os.path.basename(info)) ) else: # If there is no info file, we will actually create one in this new @@ -173,7 +173,10 @@ def runtime_init(self, flow, graph, package, run_id): # EXT_PKG extensions are PYTHONPATH extensions. Instead of re-resolving, # we use the resolved information that is written out to the INFO file. with open( - os.path.join(self.metaflow_dir.name, os.path.basename(INFO_FILE)), + os.path.join( + self.metaflow_dir.name, + os.path.basename(SpecialFile.INFO_FILE.value), + ), mode="wt", encoding="utf-8", ) as f: diff --git a/metaflow/special_files.py b/metaflow/special_files.py new file mode 100644 index 00000000000..bea74b2e210 --- /dev/null +++ b/metaflow/special_files.py @@ -0,0 +1,41 @@ +import json +import os + +from enum import Enum + +from .util import get_metaflow_root + +_info_file_content = None +_info_file_present = None + + +# Ideally these would be in package/mfenv.py but that would cause imports to fail so +# moving here. The reason is that this is needed to read extension information which needs +# to happen before mfenv gets packaged. + +MFENV_DIR = ".mfenv" + + +class SpecialFile(Enum): + INFO_FILE = "INFO" + CONFIG_FILE = "CONFIG_PARAMETERS" + + +def read_info_file(): + + global _info_file_content + global _info_file_present + if _info_file_present is None: + file_path = os.path.join( + get_metaflow_root(), MFENV_DIR, SpecialFile.INFO_FILE.value + ) + if os.path.exists(file_path): + with open(file_path, "r") as f: + _info_file_content = json.load(f) + _info_file_present = True + else: + _info_file_present = False + + if _info_file_present: + return _info_file_content + return None diff --git a/metaflow/user_configs/config_options.py b/metaflow/user_configs/config_options.py index 2453c427aea..62f67224a0d 100644 --- a/metaflow/user_configs/config_options.py +++ b/metaflow/user_configs/config_options.py @@ -7,9 +7,10 @@ from metaflow._vendor import click from metaflow.debug import debug -from .config_parameters import CONFIG_FILE, ConfigValue +from .config_parameters import ConfigValue from ..exception import MetaflowException, MetaflowInternalError from ..parameters import DeployTimeField, ParameterContext, current_flow +from ..special_files import SpecialFile from ..util import get_username @@ -24,7 +25,7 @@ def _load_config_values(info_file: Optional[str] = None) -> Optional[Dict[Any, Any]]: if info_file is None: - info_file = os.path.basename(CONFIG_FILE) + info_file = os.path.basename(SpecialFile.CONFIG_FILE.value) try: with open(info_file, encoding="utf-8") as contents: return json.load(contents).get("user_configs", {}) diff --git a/metaflow/user_configs/config_parameters.py b/metaflow/user_configs/config_parameters.py index a430285c9d5..e5af4a82852 100644 --- a/metaflow/user_configs/config_parameters.py +++ b/metaflow/user_configs/config_parameters.py @@ -36,10 +36,6 @@ # return tracefunc_closure -CONFIG_FILE = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "CONFIG_PARAMETERS" -) - ID_PATTERN = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$") UNPACK_KEY = "_unpacked_delayed_" diff --git a/metaflow/util.py b/metaflow/util.py index f9051aff589..de3c096249f 100644 --- a/metaflow/util.py +++ b/metaflow/util.py @@ -9,7 +9,6 @@ from itertools import takewhile import re -from metaflow.exception import MetaflowUnknownUser, MetaflowInternalError try: # python2 @@ -162,6 +161,8 @@ def get_username(): def resolve_identity_as_tuple(): + from metaflow.exception import MetaflowUnknownUser + prod_token = os.environ.get("METAFLOW_PRODUCTION_TOKEN") if prod_token: return "production", prod_token @@ -236,6 +237,8 @@ class of the given object. def compress_list(lst, separator=",", rangedelim=":", zlibmarker="!", zlibmin=500): + from metaflow.exception import MetaflowInternalError + bad_items = [x for x in lst if separator in x or rangedelim in x or zlibmarker in x] if bad_items: raise MetaflowInternalError( From 789e84118c5083e6712439af93786afb7af76087 Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Thu, 6 Mar 2025 00:28:00 -0800 Subject: [PATCH 2/3] Basic packaging works with Kubernetes (Conda still needs to be fixed to use correct file) --- metaflow/cli.py | 7 ++ metaflow/client/core.py | 4 +- metaflow/extension_support/__init__.py | 2 +- metaflow/meta_files.py | 58 +++++++++++ metaflow/metaflow_environment.py | 3 + metaflow/metaflow_version.py | 2 +- metaflow/package/__init__.py | 16 ++-- metaflow/package/mfenv.py | 117 ++++++++++++++++------- metaflow/plugins/pypi/conda_decorator.py | 6 +- metaflow/special_files.py | 41 -------- metaflow/user_configs/config_options.py | 4 +- 11 files changed, 169 insertions(+), 91 deletions(-) create mode 100644 metaflow/meta_files.py delete mode 100644 metaflow/special_files.py diff --git a/metaflow/cli.py b/metaflow/cli.py index 1fa0c729f88..2d44f4c103a 100644 --- a/metaflow/cli.py +++ b/metaflow/cli.py @@ -15,6 +15,7 @@ from .exception import CommandException, MetaflowException from .flowspec import _FlowState from .graph import FlowGraph +from .meta_files import read_included_dist_info from .metaflow_config import ( DEFAULT_DATASTORE, DEFAULT_ENVIRONMENT, @@ -26,6 +27,7 @@ from .metaflow_current import current from metaflow.system import _system_monitor, _system_logger from .metaflow_environment import MetaflowEnvironment +from .package.mfenv import PackagedDistributionFinder from .plugins import ( DATASTORES, ENVIRONMENTS, @@ -325,6 +327,11 @@ def start( echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False) echo(" for *%s*" % resolve_identity(), fg="magenta") + # Check if we need to setup the distribution finder (if running ) + dist_info = read_included_dist_info() + if dist_info: + sys.meta_path.append(PackagedDistributionFinder(dist_info)) + # Setup the context cli_args._set_top_kwargs(ctx.params) ctx.obj.echo = echo diff --git a/metaflow/client/core.py b/metaflow/client/core.py index 4d3ea0676f8..29d9d23f8c8 100644 --- a/metaflow/client/core.py +++ b/metaflow/client/core.py @@ -34,7 +34,7 @@ from metaflow.metaflow_environment import MetaflowEnvironment from metaflow.package.mfenv import MFEnv from metaflow.plugins import ENVIRONMENTS, METADATA_PROVIDERS -from metaflow.special_files import SpecialFile +from metaflow.meta_files import MetaFile from metaflow.unbounded_foreach import CONTROL_TASK_TAG from metaflow.util import cached_property, is_stringish, resolve_identity, to_unicode @@ -825,7 +825,7 @@ def __init__(self, flow_name: str, code_package: str): ) code_obj = BytesIO(blobdata) self._tar = tarfile.open(fileobj=code_obj, mode="r:gz") - info_str = MFEnv.get_archive_content(self._tar, SpecialFile.INFO_FILE) + info_str = MFEnv.get_archive_content(self._tar, MetaFile.INFO_FILE) self._info = json.loads(info_str) self._flowspec = self._tar.extractfile(self._info["script"]).read() diff --git a/metaflow/extension_support/__init__.py b/metaflow/extension_support/__init__.py index 5de8de66929..f1f25b7777e 100644 --- a/metaflow/extension_support/__init__.py +++ b/metaflow/extension_support/__init__.py @@ -12,7 +12,7 @@ from itertools import chain from pathlib import Path -from metaflow.special_files import read_info_file +from metaflow.meta_files import read_info_file # diff --git a/metaflow/meta_files.py b/metaflow/meta_files.py new file mode 100644 index 00000000000..b14c1c68214 --- /dev/null +++ b/metaflow/meta_files.py @@ -0,0 +1,58 @@ +import json +import os + +from enum import Enum + +from .util import get_metaflow_root + +_info_file_content = None +_info_file_present = None +_included_dist_info = None +_included_dist_present = None + +# Ideally these would be in package/mfenv.py but that would cause imports to fail so +# moving here. The reason is that this is needed to read extension information which needs +# to happen before mfenv gets packaged. + +MFENV_DIR = ".mfenv" + + +class MetaFile(Enum): + INFO_FILE = "INFO" + CONFIG_FILE = "CONFIG_PARAMETERS" + INCLUDED_DIST_INFO = "INCLUDED_DIST_INFO" + + +def read_info_file(): + + global _info_file_content + global _info_file_present + if _info_file_present is None: + file_path = os.path.join(get_metaflow_root(), MetaFile.INFO_FILE.value) + if os.path.exists(file_path): + with open(file_path, "r", encoding="utf-8") as f: + _info_file_content = json.load(f) + _info_file_present = True + else: + _info_file_present = False + + if _info_file_present: + return _info_file_content + return None + + +def read_included_dist_info(): + global _included_dist_info + global _included_dist_present + if _included_dist_present is None: + file_path = os.path.join(get_metaflow_root(), MetaFile.INCLUDED_DIST_INFO.value) + if os.path.exists(file_path): + with open(file_path, "r", encoding="utf-8") as f: + _included_dist_info = json.load(f) + _included_dist_present = True + else: + _included_dist_present = False + + if _included_dist_present: + return _included_dist_info + return None diff --git a/metaflow/metaflow_environment.py b/metaflow/metaflow_environment.py index dde7be0b9fe..d6084c06c3a 100644 --- a/metaflow/metaflow_environment.py +++ b/metaflow/metaflow_environment.py @@ -7,6 +7,8 @@ from metaflow.exception import MetaflowException from metaflow.extension_support import dump_module_info from metaflow.mflog import BASH_MFLOG, BASH_FLUSH_LOGS + +from .meta_files import MFENV_DIR from . import R @@ -176,6 +178,7 @@ def get_package_commands(self, code_package_url, datastore_type): "after 6 tries. Exiting...' && exit 1; " "fi" % code_package_url, "TAR_OPTIONS='--warning=no-timestamp' tar xf job.tar", + "export PYTHONPATH=`pwd`/%s:$PYTHONPATH" % MFENV_DIR, "mflog 'Task is starting.'", "flush_mflogs", ] diff --git a/metaflow/metaflow_version.py b/metaflow/metaflow_version.py index 92dab210bdf..e0857f4a807 100644 --- a/metaflow/metaflow_version.py +++ b/metaflow/metaflow_version.py @@ -11,7 +11,7 @@ from os import path, name, environ, listdir from metaflow.extension_support import update_package_info -from metaflow.special_files import read_info_file +from metaflow.meta_files import read_info_file # True/False correspond to the value `public`` in get_version diff --git a/metaflow/package/__init__.py b/metaflow/package/__init__.py index 74c763bd536..b4430633d8e 100644 --- a/metaflow/package/__init__.py +++ b/metaflow/package/__init__.py @@ -8,7 +8,7 @@ from ..metaflow_config import DEFAULT_PACKAGE_SUFFIXES from ..exception import MetaflowException -from ..special_files import SpecialFile +from ..meta_files import MetaFile from ..user_configs.config_parameters import dump_config_values from .. import R @@ -46,16 +46,16 @@ def __init__(self, flow, environment, echo, suffixes=DEFAULT_SUFFIXES_LIST): self._code_env = MFEnv(lambda x: hasattr(x, "METAFLOW_PACKAGE")) - # Add special content - self._code_env.add_special_content( - SpecialFile.INFO_FILE, + # Add metacontent + self._code_env.add_meta_content( + MetaFile.INFO_FILE, json.dumps( self.environment.get_environment_info(include_ext_info=True) ).encode("utf-8"), ) - self._code_env.add_special_content( - SpecialFile.CONFIG_FILE, + self._code_env.add_meta_content( + MetaFile.CONFIG_FILE, json.dumps(dump_config_values(self._flow)).encode("utf-8"), ) @@ -68,7 +68,7 @@ def path_tuples(self): # Package the environment for path, arcname in self._code_env.files(): yield path, arcname - for _, arcname in self._code_env.contents(): + for _, arcname in self._code_env.metacontents(): yield f"{arcname}", arcname # Package the user code @@ -139,7 +139,7 @@ def no_mtime(tarinfo): # Package the environment for path, arcname in self._code_env.files(): tar.add(path, arcname=arcname, recursive=False, filter=no_mtime) - for content, arcname in self._code_env.contents(): + for content, arcname in self._code_env.metacontents(): self._add_file(tar, arcname, BytesIO(content)) # Package the user code diff --git a/metaflow/package/mfenv.py b/metaflow/package/mfenv.py index 4e08517302c..6c5ad8c6527 100644 --- a/metaflow/package/mfenv.py +++ b/metaflow/package/mfenv.py @@ -1,5 +1,7 @@ import inspect +import json import os +import re import sys import tarfile @@ -26,7 +28,7 @@ from ..debug import debug from ..extension_support import EXT_EXCLUDE_SUFFIXES, metadata, package_mfext_all -from ..special_files import MFENV_DIR, SpecialFile +from ..meta_files import MFENV_DIR, MetaFile from ..util import get_metaflow_root, to_unicode packages_distributions = None @@ -80,6 +82,8 @@ def importable_name(name): _cached_distributions = None +name_normalizer = re.compile(r"[-_.]+") + def modules_to_distributions() -> Dict[str, List[metadata.Distribution]]: """ @@ -106,6 +110,50 @@ class _ModuleInfo: module: ModuleType +class PackagedDistribution(metadata.Distribution): + """ + A Python Package packaged within a MFEnv. This allows users to use use importlib + as they would regularly and the packaged Python Package would be considered as a + distribution even if it really isn't (since it is just included in the PythonPath). + """ + + def __init__(self, root: str, content: Dict[str, str]): + self._root = Path(root) + self._content = content + + # Strongly inspired from PathDistribution in metadata.py + def read_text(self, filename: Union[str, os.PathLike[str]]) -> Optional[str]: + if str(filename) in self._content: + return self._content[str(filename)] + return None + + read_text.__doc__ = metadata.Distribution.read_text.__doc__ + + def locate_file(self, path: Union[str, os.PathLike[str]]) -> metadata.SimplePath: + return self._root / path + + +class PackagedDistributionFinder(metadata.DistributionFinder): + + def __init__(self, dist_info: Dict[str, Dict[str, str]]): + self._dist_info = dist_info + + def find_distributions(self, context=metadata.DistributionFinder.Context()): + if context.name is None: + # Yields all known distributions + for name, info in self._dist_info.items(): + yield PackagedDistribution( + os.path.join(get_metaflow_root(), name), info + ) + name = name_normalizer.sub("-", context.name).lower() + if name in self._dist_info: + yield PackagedDistribution( + os.path.join(get_metaflow_root(), context.name), + self._dist_info[name], + ) + return None + + class MFEnv: METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"] @@ -172,20 +220,17 @@ def walk( yield p, p[prefixlen:] @classmethod - def get_filename(cls, name: Union[SpecialFile, str]) -> Optional[str]: - # In all cases, the special files are siblings of the metaflow root - # directory. - if isinstance(name, SpecialFile): - r = get_metaflow_root() - path_to_file = os.path.join(r, name.value) - else: - path_to_file = os.path.join(MFENV_DIR, name) + def get_filename(cls, name: Union[MetaFile, str]) -> Optional[str]: + # Get the filename of the expanded file -- it will always be expanded next to + # metaflow_root which is already in MFENV_DIR. + real_name = name.value if isinstance(name, MetaFile) else name + path_to_file = os.path.join(get_metaflow_root(), real_name) if os.path.isfile(path_to_file): return path_to_file return None @classmethod - def get_content(cls, name: Union[SpecialFile, str]) -> Optional[str]: + def get_content(cls, name: Union[MetaFile, str]) -> Optional[str]: file_to_read = cls.get_filename(name) if file_to_read: with open(file_to_read, "r", encoding="utf-8") as f: @@ -194,11 +239,11 @@ def get_content(cls, name: Union[SpecialFile, str]) -> Optional[str]: @classmethod def get_archive_filename( - cls, archive: tarfile.TarFile, name: Union[SpecialFile, str] + cls, archive: tarfile.TarFile, name: Union[MetaFile, str] ) -> Optional[str]: # Backward compatible way of accessing all special files. Prior to MFEnv, they # were stored at the TL of the archive. - real_name = name.value if isinstance(name, SpecialFile) else name + real_name = name.value if isinstance(name, MetaFile) else name if archive.getmember(MFENV_DIR): file_path = os.path.join(MFENV_DIR, real_name) else: @@ -209,7 +254,7 @@ def get_archive_filename( @classmethod def get_archive_content( - cls, archive: tarfile.TarFile, name: Union[SpecialFile, str] + cls, archive: tarfile.TarFile, name: Union[MetaFile, str] ) -> Optional[str]: file_to_read = cls.get_archive_filename(archive, name) if file_to_read: @@ -240,13 +285,13 @@ def __init__(self, criteria: Callable[[ModuleType], bool]) -> None: # Contain metadata information regarding the distributions packaged. # This allows Metaflow to "fake" distribution information when packaged - self._metainfo = {} # type: Dict[str, Dict[str, str]] + self._distmetainfo = {} # type: Dict[str, Dict[str, str]] # Maps an absolute path on the filesystem to the path of the file in the # archive. self._files = {} # type: Dict[str, str] - self._content = {} # type: Dict[SpecialFile, bytes] + self._metacontent = {} # type: Dict[MetaFile, bytes] debug.package_exec(f"Used system modules found: {str(self._modules)}") @@ -264,21 +309,21 @@ def __init__(self, criteria: Callable[[ModuleType], bool]) -> None: def root_dir(self): return MFENV_DIR - def add_special_content(self, name: SpecialFile, content: bytes) -> None: + def add_meta_content(self, name: MetaFile, content: bytes) -> None: """ - Add a special file to the MF environment. + Add a metafile to the MF environment. This file will be included in the resulting code package in `MFENV_DIR`. Parameters ---------- - name : SpecialFile - The special file to add to the MF environment + name : MetaFile + The metafile to add to the MF environment content : bytes - The content of the special file + The content of the metafile """ debug.package_exec(f"Adding special content {name.value} to the MF environment") - self._content[name] = content + self._metacontent[name] = content def add_module(self, module: ModuleType) -> None: """ @@ -394,19 +439,24 @@ def files(self) -> Generator[Tuple[str, str], None, None]: """ return self._files.items() - def contents(self) -> Generator[Tuple[bytes, str], None, None]: + def metacontents(self) -> Generator[Tuple[bytes, str], None, None]: """ - Return a generator of all special files included in the MF environment. + Return a generator of all metafiles included in the MF environment. Returns ------- Generator[Tuple[bytes, str], None, None] - A generator of all special files included in the MF environment. The first + A generator of all metafiles included in the MF environment. The first element of the tuple is the content to add; the second element is path in the archive. """ - for name, content in self._content.items(): + for name, content in self._metacontent.items(): yield content, os.path.join(MFENV_DIR, name.value) + if self._distmetainfo: + yield ( + json.dumps(self._distmetainfo).encode("utf-8"), + os.path.join(MFENV_DIR, MetaFile.INCLUDED_DIST_INFO.value), + ) def _module_files( self, name: str, paths: Set[str] @@ -432,18 +482,19 @@ def _module_files( debug.package_exec( f" Including distribution {dist_name} for module {name}" ) - dist_root = dist.locate_file(name) + dist_root = str(dist.locate_file(name)) if dist_root not in paths: # This is an error because it means that this distribution is # not contributing to the module. raise RuntimeError( f"Distribution '{dist.metadata['Name']}' is not " - "contributing to module '{name}' as expected." + f"contributing to module '{name}' as expected (got '{dist_root}' " + f"when expected one of {paths})" ) paths.discard(dist_root) - if dist_name not in self._metainfo: + if dist_name not in self._distmetainfo: # Possible that a distribution contributes to multiple modules - self._metainfo[dist_name] = { + self._distmetainfo[dist_name] = { # We can add more if needed but these are likely the most # useful (captures, name, version, etc and files which can # be used to find non-python files in the distribution). @@ -453,13 +504,13 @@ def _module_files( for file in dist.files or []: # Skip files that do not belong to this module (distribution may # provide multiple modules) - if not file.startswith(prefix): + if file.parts[0] != name: continue if file == init_file: has_init = True - yield str(dist.locate(file).resolve().as_posix()), os.path.join( - MFENV_DIR, str(file) - ) + yield str( + dist.locate_file(file).resolve().as_posix() + ), os.path.join(MFENV_DIR, str(file)) # Now if there are more paths left in paths, it means there is a non-distribution # component to this package which we also include. diff --git a/metaflow/plugins/pypi/conda_decorator.py b/metaflow/plugins/pypi/conda_decorator.py index 5396ddf8788..3baa3e25357 100644 --- a/metaflow/plugins/pypi/conda_decorator.py +++ b/metaflow/plugins/pypi/conda_decorator.py @@ -11,7 +11,7 @@ from metaflow.metadata_provider import MetaDatum from metaflow.metaflow_environment import InvalidEnvironmentException from metaflow.package.mfenv import MFEnv -from metaflow.special_files import SpecialFile +from metaflow.meta_files import MetaFile from metaflow.util import get_metaflow_root @@ -159,7 +159,7 @@ def runtime_init(self, flow, graph, package, run_id): os.path.join(self.metaflow_dir.name, "metaflow"), ) - info = MFEnv.get_filename(SpecialFile.INFO_FILE) + info = MFEnv.get_filename(MetaFile.INFO_FILE) # Symlink the INFO file as well to properly propagate down the Metaflow version if info: os.symlink( @@ -175,7 +175,7 @@ def runtime_init(self, flow, graph, package, run_id): with open( os.path.join( self.metaflow_dir.name, - os.path.basename(SpecialFile.INFO_FILE.value), + os.path.basename(MetaFile.INFO_FILE.value), ), mode="wt", encoding="utf-8", diff --git a/metaflow/special_files.py b/metaflow/special_files.py deleted file mode 100644 index bea74b2e210..00000000000 --- a/metaflow/special_files.py +++ /dev/null @@ -1,41 +0,0 @@ -import json -import os - -from enum import Enum - -from .util import get_metaflow_root - -_info_file_content = None -_info_file_present = None - - -# Ideally these would be in package/mfenv.py but that would cause imports to fail so -# moving here. The reason is that this is needed to read extension information which needs -# to happen before mfenv gets packaged. - -MFENV_DIR = ".mfenv" - - -class SpecialFile(Enum): - INFO_FILE = "INFO" - CONFIG_FILE = "CONFIG_PARAMETERS" - - -def read_info_file(): - - global _info_file_content - global _info_file_present - if _info_file_present is None: - file_path = os.path.join( - get_metaflow_root(), MFENV_DIR, SpecialFile.INFO_FILE.value - ) - if os.path.exists(file_path): - with open(file_path, "r") as f: - _info_file_content = json.load(f) - _info_file_present = True - else: - _info_file_present = False - - if _info_file_present: - return _info_file_content - return None diff --git a/metaflow/user_configs/config_options.py b/metaflow/user_configs/config_options.py index 62f67224a0d..12556dc4753 100644 --- a/metaflow/user_configs/config_options.py +++ b/metaflow/user_configs/config_options.py @@ -10,7 +10,7 @@ from .config_parameters import ConfigValue from ..exception import MetaflowException, MetaflowInternalError from ..parameters import DeployTimeField, ParameterContext, current_flow -from ..special_files import SpecialFile +from ..meta_files import MetaFile from ..util import get_username @@ -25,7 +25,7 @@ def _load_config_values(info_file: Optional[str] = None) -> Optional[Dict[Any, Any]]: if info_file is None: - info_file = os.path.basename(SpecialFile.CONFIG_FILE.value) + info_file = os.path.basename(MetaFile.CONFIG_FILE.value) try: with open(info_file, encoding="utf-8") as contents: return json.load(contents).get("user_configs", {}) From ad61eac148fa40a51c5311806742295a79200009 Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Fri, 7 Mar 2025 11:33:26 -0800 Subject: [PATCH 3/3] Removed 3.5 vendoring; fixed conda; hopefully fixed tests --- metaflow/_vendor/v3_5/__init__.py | 1 - .../_vendor/v3_5/importlib_metadata.LICENSE | 13 - .../v3_5/importlib_metadata/__init__.py | 644 ------------------ .../v3_5/importlib_metadata/_compat.py | 152 ----- metaflow/_vendor/v3_5/zipp.LICENSE | 19 - metaflow/_vendor/v3_5/zipp.py | 329 --------- metaflow/_vendor/vendor_v3_5.txt | 2 - metaflow/cmd/develop/stubs.py | 34 +- metaflow/extension_support/__init__.py | 67 +- metaflow/package/mfenv.py | 14 +- metaflow/plugins/pypi/bootstrap.py | 4 +- metaflow/vendor.py | 1 - 12 files changed, 42 insertions(+), 1238 deletions(-) delete mode 100644 metaflow/_vendor/v3_5/__init__.py delete mode 100644 metaflow/_vendor/v3_5/importlib_metadata.LICENSE delete mode 100644 metaflow/_vendor/v3_5/importlib_metadata/__init__.py delete mode 100644 metaflow/_vendor/v3_5/importlib_metadata/_compat.py delete mode 100644 metaflow/_vendor/v3_5/zipp.LICENSE delete mode 100644 metaflow/_vendor/v3_5/zipp.py delete mode 100644 metaflow/_vendor/vendor_v3_5.txt diff --git a/metaflow/_vendor/v3_5/__init__.py b/metaflow/_vendor/v3_5/__init__.py deleted file mode 100644 index 22ae0c5f40e..00000000000 --- a/metaflow/_vendor/v3_5/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Empty file \ No newline at end of file diff --git a/metaflow/_vendor/v3_5/importlib_metadata.LICENSE b/metaflow/_vendor/v3_5/importlib_metadata.LICENSE deleted file mode 100644 index be7e092b0b0..00000000000 --- a/metaflow/_vendor/v3_5/importlib_metadata.LICENSE +++ /dev/null @@ -1,13 +0,0 @@ -Copyright 2017-2019 Jason R. Coombs, Barry Warsaw - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/metaflow/_vendor/v3_5/importlib_metadata/__init__.py b/metaflow/_vendor/v3_5/importlib_metadata/__init__.py deleted file mode 100644 index 429bfa66c4f..00000000000 --- a/metaflow/_vendor/v3_5/importlib_metadata/__init__.py +++ /dev/null @@ -1,644 +0,0 @@ -from __future__ import unicode_literals, absolute_import - -import io -import os -import re -import abc -import csv -import sys -from metaflow._vendor.v3_5 import zipp -import operator -import functools -import itertools -import posixpath -import collections - -from ._compat import ( - install, - NullFinder, - ConfigParser, - suppress, - map, - FileNotFoundError, - IsADirectoryError, - NotADirectoryError, - PermissionError, - pathlib, - ModuleNotFoundError, - MetaPathFinder, - email_message_from_string, - PyPy_repr, - unique_ordered, - str, - ) -from importlib import import_module -from itertools import starmap - - -__metaclass__ = type - - -__all__ = [ - 'Distribution', - 'DistributionFinder', - 'PackageNotFoundError', - 'distribution', - 'distributions', - 'entry_points', - 'files', - 'metadata', - 'requires', - 'version', - ] - - -class PackageNotFoundError(ModuleNotFoundError): - """The package was not found.""" - - def __str__(self): - tmpl = "No package metadata was found for {self.name}" - return tmpl.format(**locals()) - - @property - def name(self): - name, = self.args - return name - - -class EntryPoint( - PyPy_repr, - collections.namedtuple('EntryPointBase', 'name value group')): - """An entry point as defined by Python packaging conventions. - - See `the packaging docs on entry points - `_ - for more information. - """ - - pattern = re.compile( - r'(?P[\w.]+)\s*' - r'(:\s*(?P[\w.]+)\s*)?' - r'((?P\[.*\])\s*)?$' - ) - """ - A regular expression describing the syntax for an entry point, - which might look like: - - - module - - package.module - - package.module:attribute - - package.module:object.attribute - - package.module:attr [extra1, extra2] - - Other combinations are possible as well. - - The expression is lenient about whitespace around the ':', - following the attr, and following any extras. - """ - - def load(self): - """Load the entry point from its definition. If only a module - is indicated by the value, return that module. Otherwise, - return the named object. - """ - match = self.pattern.match(self.value) - module = import_module(match.group('module')) - attrs = filter(None, (match.group('attr') or '').split('.')) - return functools.reduce(getattr, attrs, module) - - @property - def module(self): - match = self.pattern.match(self.value) - return match.group('module') - - @property - def attr(self): - match = self.pattern.match(self.value) - return match.group('attr') - - @property - def extras(self): - match = self.pattern.match(self.value) - return list(re.finditer(r'\w+', match.group('extras') or '')) - - @classmethod - def _from_config(cls, config): - return [ - cls(name, value, group) - for group in config.sections() - for name, value in config.items(group) - ] - - @classmethod - def _from_text(cls, text): - config = ConfigParser(delimiters='=') - # case sensitive: https://stackoverflow.com/q/1611799/812183 - config.optionxform = str - try: - config.read_string(text) - except AttributeError: # pragma: nocover - # Python 2 has no read_string - config.readfp(io.StringIO(text)) - return EntryPoint._from_config(config) - - def __iter__(self): - """ - Supply iter so one may construct dicts of EntryPoints easily. - """ - return iter((self.name, self)) - - def __reduce__(self): - return ( - self.__class__, - (self.name, self.value, self.group), - ) - - -class PackagePath(pathlib.PurePosixPath): - """A reference to a path in a package""" - - def read_text(self, encoding='utf-8'): - with self.locate().open(encoding=encoding) as stream: - return stream.read() - - def read_binary(self): - with self.locate().open('rb') as stream: - return stream.read() - - def locate(self): - """Return a path-like object for this path""" - return self.dist.locate_file(self) - - -class FileHash: - def __init__(self, spec): - self.mode, _, self.value = spec.partition('=') - - def __repr__(self): - return ''.format(self.mode, self.value) - - -class Distribution: - """A Python distribution package.""" - - @abc.abstractmethod - def read_text(self, filename): - """Attempt to load metadata file given by the name. - - :param filename: The name of the file in the distribution info. - :return: The text if found, otherwise None. - """ - - @abc.abstractmethod - def locate_file(self, path): - """ - Given a path to a file in this distribution, return a path - to it. - """ - - @classmethod - def from_name(cls, name): - """Return the Distribution for the given package name. - - :param name: The name of the distribution package to search for. - :return: The Distribution instance (or subclass thereof) for the named - package, if found. - :raises PackageNotFoundError: When the named package's distribution - metadata cannot be found. - """ - for resolver in cls._discover_resolvers(): - dists = resolver(DistributionFinder.Context(name=name)) - dist = next(iter(dists), None) - if dist is not None: - return dist - else: - raise PackageNotFoundError(name) - - @classmethod - def discover(cls, **kwargs): - """Return an iterable of Distribution objects for all packages. - - Pass a ``context`` or pass keyword arguments for constructing - a context. - - :context: A ``DistributionFinder.Context`` object. - :return: Iterable of Distribution objects for all packages. - """ - context = kwargs.pop('context', None) - if context and kwargs: - raise ValueError("cannot accept context and kwargs") - context = context or DistributionFinder.Context(**kwargs) - return itertools.chain.from_iterable( - resolver(context) - for resolver in cls._discover_resolvers() - ) - - @staticmethod - def at(path): - """Return a Distribution for the indicated metadata path - - :param path: a string or path-like object - :return: a concrete Distribution instance for the path - """ - return PathDistribution(pathlib.Path(path)) - - @staticmethod - def _discover_resolvers(): - """Search the meta_path for resolvers.""" - declared = ( - getattr(finder, 'find_distributions', None) - for finder in sys.meta_path - ) - return filter(None, declared) - - @classmethod - def _local(cls, root='.'): - from pep517 import build, meta - system = build.compat_system(root) - builder = functools.partial( - meta.build, - source_dir=root, - system=system, - ) - return PathDistribution(zipp.Path(meta.build_as_zip(builder))) - - @property - def metadata(self): - """Return the parsed metadata for this Distribution. - - The returned object will have keys that name the various bits of - metadata. See PEP 566 for details. - """ - text = ( - self.read_text('METADATA') - or self.read_text('PKG-INFO') - # This last clause is here to support old egg-info files. Its - # effect is to just end up using the PathDistribution's self._path - # (which points to the egg-info file) attribute unchanged. - or self.read_text('') - ) - return email_message_from_string(text) - - @property - def version(self): - """Return the 'Version' metadata for the distribution package.""" - return self.metadata['Version'] - - @property - def entry_points(self): - return EntryPoint._from_text(self.read_text('entry_points.txt')) - - @property - def files(self): - """Files in this distribution. - - :return: List of PackagePath for this distribution or None - - Result is `None` if the metadata file that enumerates files - (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is - missing. - Result may be empty if the metadata exists but is empty. - """ - file_lines = self._read_files_distinfo() or self._read_files_egginfo() - - def make_file(name, hash=None, size_str=None): - result = PackagePath(name) - result.hash = FileHash(hash) if hash else None - result.size = int(size_str) if size_str else None - result.dist = self - return result - - return file_lines and list(starmap(make_file, csv.reader(file_lines))) - - def _read_files_distinfo(self): - """ - Read the lines of RECORD - """ - text = self.read_text('RECORD') - return text and text.splitlines() - - def _read_files_egginfo(self): - """ - SOURCES.txt might contain literal commas, so wrap each line - in quotes. - """ - text = self.read_text('SOURCES.txt') - return text and map('"{}"'.format, text.splitlines()) - - @property - def requires(self): - """Generated requirements specified for this Distribution""" - reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs() - return reqs and list(reqs) - - def _read_dist_info_reqs(self): - return self.metadata.get_all('Requires-Dist') - - def _read_egg_info_reqs(self): - source = self.read_text('requires.txt') - return source and self._deps_from_requires_text(source) - - @classmethod - def _deps_from_requires_text(cls, source): - section_pairs = cls._read_sections(source.splitlines()) - sections = { - section: list(map(operator.itemgetter('line'), results)) - for section, results in - itertools.groupby(section_pairs, operator.itemgetter('section')) - } - return cls._convert_egg_info_reqs_to_simple_reqs(sections) - - @staticmethod - def _read_sections(lines): - section = None - for line in filter(None, lines): - section_match = re.match(r'\[(.*)\]$', line) - if section_match: - section = section_match.group(1) - continue - yield locals() - - @staticmethod - def _convert_egg_info_reqs_to_simple_reqs(sections): - """ - Historically, setuptools would solicit and store 'extra' - requirements, including those with environment markers, - in separate sections. More modern tools expect each - dependency to be defined separately, with any relevant - extras and environment markers attached directly to that - requirement. This method converts the former to the - latter. See _test_deps_from_requires_text for an example. - """ - def make_condition(name): - return name and 'extra == "{name}"'.format(name=name) - - def parse_condition(section): - section = section or '' - extra, sep, markers = section.partition(':') - if extra and markers: - markers = '({markers})'.format(markers=markers) - conditions = list(filter(None, [markers, make_condition(extra)])) - return '; ' + ' and '.join(conditions) if conditions else '' - - for section, deps in sections.items(): - for dep in deps: - yield dep + parse_condition(section) - - -class DistributionFinder(MetaPathFinder): - """ - A MetaPathFinder capable of discovering installed distributions. - """ - - class Context: - """ - Keyword arguments presented by the caller to - ``distributions()`` or ``Distribution.discover()`` - to narrow the scope of a search for distributions - in all DistributionFinders. - - Each DistributionFinder may expect any parameters - and should attempt to honor the canonical - parameters defined below when appropriate. - """ - - name = None - """ - Specific name for which a distribution finder should match. - A name of ``None`` matches all distributions. - """ - - def __init__(self, **kwargs): - vars(self).update(kwargs) - - @property - def path(self): - """ - The path that a distribution finder should search. - - Typically refers to Python package paths and defaults - to ``sys.path``. - """ - return vars(self).get('path', sys.path) - - @abc.abstractmethod - def find_distributions(self, context=Context()): - """ - Find distributions. - - Return an iterable of all Distribution instances capable of - loading the metadata for packages matching the ``context``, - a DistributionFinder.Context instance. - """ - - -class FastPath: - """ - Micro-optimized class for searching a path for - children. - """ - - def __init__(self, root): - self.root = str(root) - self.base = os.path.basename(self.root).lower() - - def joinpath(self, child): - return pathlib.Path(self.root, child) - - def children(self): - with suppress(Exception): - return os.listdir(self.root or '.') - with suppress(Exception): - return self.zip_children() - return [] - - def zip_children(self): - zip_path = zipp.Path(self.root) - names = zip_path.root.namelist() - self.joinpath = zip_path.joinpath - - return unique_ordered( - child.split(posixpath.sep, 1)[0] - for child in names - ) - - def search(self, name): - return ( - self.joinpath(child) - for child in self.children() - if name.matches(child, self.base) - ) - - -class Prepared: - """ - A prepared search for metadata on a possibly-named package. - """ - normalized = None - suffixes = '.dist-info', '.egg-info' - exact_matches = [''][:0] - - def __init__(self, name): - self.name = name - if name is None: - return - self.normalized = self.normalize(name) - self.exact_matches = [ - self.normalized + suffix for suffix in self.suffixes] - - @staticmethod - def normalize(name): - """ - PEP 503 normalization plus dashes as underscores. - """ - return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') - - @staticmethod - def legacy_normalize(name): - """ - Normalize the package name as found in the convention in - older packaging tools versions and specs. - """ - return name.lower().replace('-', '_') - - def matches(self, cand, base): - low = cand.lower() - pre, ext = os.path.splitext(low) - name, sep, rest = pre.partition('-') - return ( - low in self.exact_matches - or ext in self.suffixes and ( - not self.normalized or - name.replace('.', '_') == self.normalized - ) - # legacy case: - or self.is_egg(base) and low == 'egg-info' - ) - - def is_egg(self, base): - normalized = self.legacy_normalize(self.name or '') - prefix = normalized + '-' if normalized else '' - versionless_egg_name = normalized + '.egg' if self.name else '' - return ( - base == versionless_egg_name - or base.startswith(prefix) - and base.endswith('.egg')) - - -@install -class MetadataPathFinder(NullFinder, DistributionFinder): - """A degenerate finder for distribution packages on the file system. - - This finder supplies only a find_distributions() method for versions - of Python that do not have a PathFinder find_distributions(). - """ - - def find_distributions(self, context=DistributionFinder.Context()): - """ - Find distributions. - - Return an iterable of all Distribution instances capable of - loading the metadata for packages matching ``context.name`` - (or all names if ``None`` indicated) along the paths in the list - of directories ``context.path``. - """ - found = self._search_paths(context.name, context.path) - return map(PathDistribution, found) - - @classmethod - def _search_paths(cls, name, paths): - """Find metadata directories in paths heuristically.""" - return itertools.chain.from_iterable( - path.search(Prepared(name)) - for path in map(FastPath, paths) - ) - - -class PathDistribution(Distribution): - def __init__(self, path): - """Construct a distribution from a path to the metadata directory. - - :param path: A pathlib.Path or similar object supporting - .joinpath(), __div__, .parent, and .read_text(). - """ - self._path = path - - def read_text(self, filename): - with suppress(FileNotFoundError, IsADirectoryError, KeyError, - NotADirectoryError, PermissionError): - return self._path.joinpath(filename).read_text(encoding='utf-8') - read_text.__doc__ = Distribution.read_text.__doc__ - - def locate_file(self, path): - return self._path.parent / path - - -def distribution(distribution_name): - """Get the ``Distribution`` instance for the named package. - - :param distribution_name: The name of the distribution package as a string. - :return: A ``Distribution`` instance (or subclass thereof). - """ - return Distribution.from_name(distribution_name) - - -def distributions(**kwargs): - """Get all ``Distribution`` instances in the current environment. - - :return: An iterable of ``Distribution`` instances. - """ - return Distribution.discover(**kwargs) - - -def metadata(distribution_name): - """Get the metadata for the named package. - - :param distribution_name: The name of the distribution package to query. - :return: An email.Message containing the parsed metadata. - """ - return Distribution.from_name(distribution_name).metadata - - -def version(distribution_name): - """Get the version string for the named package. - - :param distribution_name: The name of the distribution package to query. - :return: The version string for the package as defined in the package's - "Version" metadata key. - """ - return distribution(distribution_name).version - - -def entry_points(): - """Return EntryPoint objects for all installed packages. - - :return: EntryPoint objects for all installed packages. - """ - eps = itertools.chain.from_iterable( - dist.entry_points for dist in distributions()) - by_group = operator.attrgetter('group') - ordered = sorted(eps, key=by_group) - grouped = itertools.groupby(ordered, by_group) - return { - group: tuple(eps) - for group, eps in grouped - } - - -def files(distribution_name): - """Return a list of files for the named package. - - :param distribution_name: The name of the distribution package to query. - :return: List of files composing the distribution. - """ - return distribution(distribution_name).files - - -def requires(distribution_name): - """ - Return a list of requirements for the named package. - - :return: An iterator of requirements, suitable for - packaging.requirement.Requirement. - """ - return distribution(distribution_name).requires diff --git a/metaflow/_vendor/v3_5/importlib_metadata/_compat.py b/metaflow/_vendor/v3_5/importlib_metadata/_compat.py deleted file mode 100644 index 303d4a22e85..00000000000 --- a/metaflow/_vendor/v3_5/importlib_metadata/_compat.py +++ /dev/null @@ -1,152 +0,0 @@ -from __future__ import absolute_import, unicode_literals - -import io -import abc -import sys -import email - - -if sys.version_info > (3,): # pragma: nocover - import builtins - from configparser import ConfigParser - import contextlib - FileNotFoundError = builtins.FileNotFoundError - IsADirectoryError = builtins.IsADirectoryError - NotADirectoryError = builtins.NotADirectoryError - PermissionError = builtins.PermissionError - map = builtins.map - from itertools import filterfalse -else: # pragma: nocover - from backports.configparser import ConfigParser - from itertools import imap as map # type: ignore - from itertools import ifilterfalse as filterfalse - import contextlib2 as contextlib - FileNotFoundError = IOError, OSError - IsADirectoryError = IOError, OSError - NotADirectoryError = IOError, OSError - PermissionError = IOError, OSError - -str = type('') - -suppress = contextlib.suppress - -if sys.version_info > (3, 5): # pragma: nocover - import pathlib -else: # pragma: nocover - import pathlib2 as pathlib - -try: - ModuleNotFoundError = builtins.FileNotFoundError -except (NameError, AttributeError): # pragma: nocover - ModuleNotFoundError = ImportError # type: ignore - - -if sys.version_info >= (3,): # pragma: nocover - from importlib.abc import MetaPathFinder -else: # pragma: nocover - class MetaPathFinder(object): - __metaclass__ = abc.ABCMeta - - -__metaclass__ = type -__all__ = [ - 'install', 'NullFinder', 'MetaPathFinder', 'ModuleNotFoundError', - 'pathlib', 'ConfigParser', 'map', 'suppress', 'FileNotFoundError', - 'NotADirectoryError', 'email_message_from_string', - ] - - -def install(cls): - """ - Class decorator for installation on sys.meta_path. - - Adds the backport DistributionFinder to sys.meta_path and - attempts to disable the finder functionality of the stdlib - DistributionFinder. - """ - sys.meta_path.append(cls()) - disable_stdlib_finder() - return cls - - -def disable_stdlib_finder(): - """ - Give the backport primacy for discovering path-based distributions - by monkey-patching the stdlib O_O. - - See #91 for more background for rationale on this sketchy - behavior. - """ - def matches(finder): - return ( - getattr(finder, '__module__', None) == '_frozen_importlib_external' - and hasattr(finder, 'find_distributions') - ) - for finder in filter(matches, sys.meta_path): # pragma: nocover - del finder.find_distributions - - -class NullFinder: - """ - A "Finder" (aka "MetaClassFinder") that never finds any modules, - but may find distributions. - """ - @staticmethod - def find_spec(*args, **kwargs): - return None - - # In Python 2, the import system requires finders - # to have a find_module() method, but this usage - # is deprecated in Python 3 in favor of find_spec(). - # For the purposes of this finder (i.e. being present - # on sys.meta_path but having no other import - # system functionality), the two methods are identical. - find_module = find_spec - - -def py2_message_from_string(text): # nocoverpy3 - # Work around https://bugs.python.org/issue25545 where - # email.message_from_string cannot handle Unicode on Python 2. - io_buffer = io.StringIO(text) - return email.message_from_file(io_buffer) - - -email_message_from_string = ( - py2_message_from_string - if sys.version_info < (3,) else - email.message_from_string - ) - - -class PyPy_repr: - """ - Override repr for EntryPoint objects on PyPy to avoid __iter__ access. - Ref #97, #102. - """ - affected = hasattr(sys, 'pypy_version_info') - - def __compat_repr__(self): # pragma: nocover - def make_param(name): - value = getattr(self, name) - return '{name}={value!r}'.format(**locals()) - params = ', '.join(map(make_param, self._fields)) - return 'EntryPoint({params})'.format(**locals()) - - if affected: # pragma: nocover - __repr__ = __compat_repr__ - del affected - - -# from itertools recipes -def unique_everseen(iterable): # pragma: nocover - "List unique elements, preserving order. Remember all elements ever seen." - seen = set() - seen_add = seen.add - - for element in filterfalse(seen.__contains__, iterable): - seen_add(element) - yield element - - -unique_ordered = ( - unique_everseen if sys.version_info < (3, 7) else dict.fromkeys) diff --git a/metaflow/_vendor/v3_5/zipp.LICENSE b/metaflow/_vendor/v3_5/zipp.LICENSE deleted file mode 100644 index 353924be0e5..00000000000 --- a/metaflow/_vendor/v3_5/zipp.LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright Jason R. Coombs - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to -deal in the Software without restriction, including without limitation the -rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -IN THE SOFTWARE. diff --git a/metaflow/_vendor/v3_5/zipp.py b/metaflow/_vendor/v3_5/zipp.py deleted file mode 100644 index 26b723c1fd3..00000000000 --- a/metaflow/_vendor/v3_5/zipp.py +++ /dev/null @@ -1,329 +0,0 @@ -import io -import posixpath -import zipfile -import itertools -import contextlib -import sys -import pathlib - -if sys.version_info < (3, 7): - from collections import OrderedDict -else: - OrderedDict = dict - - -__all__ = ['Path'] - - -def _parents(path): - """ - Given a path with elements separated by - posixpath.sep, generate all parents of that path. - - >>> list(_parents('b/d')) - ['b'] - >>> list(_parents('/b/d/')) - ['/b'] - >>> list(_parents('b/d/f/')) - ['b/d', 'b'] - >>> list(_parents('b')) - [] - >>> list(_parents('')) - [] - """ - return itertools.islice(_ancestry(path), 1, None) - - -def _ancestry(path): - """ - Given a path with elements separated by - posixpath.sep, generate all elements of that path - - >>> list(_ancestry('b/d')) - ['b/d', 'b'] - >>> list(_ancestry('/b/d/')) - ['/b/d', '/b'] - >>> list(_ancestry('b/d/f/')) - ['b/d/f', 'b/d', 'b'] - >>> list(_ancestry('b')) - ['b'] - >>> list(_ancestry('')) - [] - """ - path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: - yield path - path, tail = posixpath.split(path) - - -_dedupe = OrderedDict.fromkeys -"""Deduplicate an iterable in original order""" - - -def _difference(minuend, subtrahend): - """ - Return items in minuend not in subtrahend, retaining order - with O(1) lookup. - """ - return itertools.filterfalse(set(subtrahend).__contains__, minuend) - - -class CompleteDirs(zipfile.ZipFile): - """ - A ZipFile subclass that ensures that implied directories - are always included in the namelist. - """ - - @staticmethod - def _implied_dirs(names): - parents = itertools.chain.from_iterable(map(_parents, names)) - as_dirs = (p + posixpath.sep for p in parents) - return _dedupe(_difference(as_dirs, names)) - - def namelist(self): - names = super(CompleteDirs, self).namelist() - return names + list(self._implied_dirs(names)) - - def _name_set(self): - return set(self.namelist()) - - def resolve_dir(self, name): - """ - If the name represents a directory, return that name - as a directory (with the trailing slash). - """ - names = self._name_set() - dirname = name + '/' - dir_match = name not in names and dirname in names - return dirname if dir_match else name - - @classmethod - def make(cls, source): - """ - Given a source (filename or zipfile), return an - appropriate CompleteDirs subclass. - """ - if isinstance(source, CompleteDirs): - return source - - if not isinstance(source, zipfile.ZipFile): - return cls(_pathlib_compat(source)) - - # Only allow for FastLookup when supplied zipfile is read-only - if 'r' not in source.mode: - cls = CompleteDirs - - source.__class__ = cls - return source - - -class FastLookup(CompleteDirs): - """ - ZipFile subclass to ensure implicit - dirs exist and are resolved rapidly. - """ - - def namelist(self): - with contextlib.suppress(AttributeError): - return self.__names - self.__names = super(FastLookup, self).namelist() - return self.__names - - def _name_set(self): - with contextlib.suppress(AttributeError): - return self.__lookup - self.__lookup = super(FastLookup, self)._name_set() - return self.__lookup - - -def _pathlib_compat(path): - """ - For path-like objects, convert to a filename for compatibility - on Python 3.6.1 and earlier. - """ - try: - return path.__fspath__() - except AttributeError: - return str(path) - - -class Path: - """ - A pathlib-compatible interface for zip files. - - Consider a zip file with this structure:: - - . - ├── a.txt - └── b - ├── c.txt - └── d - └── e.txt - - >>> data = io.BytesIO() - >>> zf = zipfile.ZipFile(data, 'w') - >>> zf.writestr('a.txt', 'content of a') - >>> zf.writestr('b/c.txt', 'content of c') - >>> zf.writestr('b/d/e.txt', 'content of e') - >>> zf.filename = 'mem/abcde.zip' - - Path accepts the zipfile object itself or a filename - - >>> root = Path(zf) - - From there, several path operations are available. - - Directory iteration (including the zip file itself): - - >>> a, b = root.iterdir() - >>> a - Path('mem/abcde.zip', 'a.txt') - >>> b - Path('mem/abcde.zip', 'b/') - - name property: - - >>> b.name - 'b' - - join with divide operator: - - >>> c = b / 'c.txt' - >>> c - Path('mem/abcde.zip', 'b/c.txt') - >>> c.name - 'c.txt' - - Read text: - - >>> c.read_text() - 'content of c' - - existence: - - >>> c.exists() - True - >>> (b / 'missing.txt').exists() - False - - Coercion to string: - - >>> import os - >>> str(c).replace(os.sep, posixpath.sep) - 'mem/abcde.zip/b/c.txt' - - At the root, ``name``, ``filename``, and ``parent`` - resolve to the zipfile. Note these attributes are not - valid and will raise a ``ValueError`` if the zipfile - has no filename. - - >>> root.name - 'abcde.zip' - >>> str(root.filename).replace(os.sep, posixpath.sep) - 'mem/abcde.zip' - >>> str(root.parent) - 'mem' - """ - - __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" - - def __init__(self, root, at=""): - """ - Construct a Path from a ZipFile or filename. - - Note: When the source is an existing ZipFile object, - its type (__class__) will be mutated to a - specialized type. If the caller wishes to retain the - original type, the caller should either create a - separate ZipFile object or pass a filename. - """ - self.root = FastLookup.make(root) - self.at = at - - def open(self, mode='r', *args, pwd=None, **kwargs): - """ - Open this entry as text or binary following the semantics - of ``pathlib.Path.open()`` by passing arguments through - to io.TextIOWrapper(). - """ - if self.is_dir(): - raise IsADirectoryError(self) - zip_mode = mode[0] - if not self.exists() and zip_mode == 'r': - raise FileNotFoundError(self) - stream = self.root.open(self.at, zip_mode, pwd=pwd) - if 'b' in mode: - if args or kwargs: - raise ValueError("encoding args invalid for binary operation") - return stream - return io.TextIOWrapper(stream, *args, **kwargs) - - @property - def name(self): - return pathlib.Path(self.at).name or self.filename.name - - @property - def suffix(self): - return pathlib.Path(self.at).suffix or self.filename.suffix - - @property - def suffixes(self): - return pathlib.Path(self.at).suffixes or self.filename.suffixes - - @property - def stem(self): - return pathlib.Path(self.at).stem or self.filename.stem - - @property - def filename(self): - return pathlib.Path(self.root.filename).joinpath(self.at) - - def read_text(self, *args, **kwargs): - with self.open('r', *args, **kwargs) as strm: - return strm.read() - - def read_bytes(self): - with self.open('rb') as strm: - return strm.read() - - def _is_child(self, path): - return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") - - def _next(self, at): - return self.__class__(self.root, at) - - def is_dir(self): - return not self.at or self.at.endswith("/") - - def is_file(self): - return self.exists() and not self.is_dir() - - def exists(self): - return self.at in self.root._name_set() - - def iterdir(self): - if not self.is_dir(): - raise ValueError("Can't listdir a file") - subs = map(self._next, self.root.namelist()) - return filter(self._is_child, subs) - - def __str__(self): - return posixpath.join(self.root.filename, self.at) - - def __repr__(self): - return self.__repr.format(self=self) - - def joinpath(self, *other): - next = posixpath.join(self.at, *map(_pathlib_compat, other)) - return self._next(self.root.resolve_dir(next)) - - __truediv__ = joinpath - - @property - def parent(self): - if not self.at: - return self.filename.parent - parent_at = posixpath.dirname(self.at.rstrip('/')) - if parent_at: - parent_at += '/' - return self._next(parent_at) diff --git a/metaflow/_vendor/vendor_v3_5.txt b/metaflow/_vendor/vendor_v3_5.txt deleted file mode 100644 index f8c5171d11d..00000000000 --- a/metaflow/_vendor/vendor_v3_5.txt +++ /dev/null @@ -1,2 +0,0 @@ -importlib_metadata==2.1.3 -zipp==3.6.0 diff --git a/metaflow/cmd/develop/stubs.py b/metaflow/cmd/develop/stubs.py index 64bdc7b5a16..4e6f144d8b2 100644 --- a/metaflow/cmd/develop/stubs.py +++ b/metaflow/cmd/develop/stubs.py @@ -12,25 +12,13 @@ from .stub_generator import StubGenerator _py_ver = sys.version_info[:2] -_metadata_package = None - -def _check_stubs_supported(): - global _metadata_package - if _metadata_package is not None: - return _metadata_package - else: - if _py_ver >= (3, 4): - if _py_ver >= (3, 8): - from importlib import metadata - elif _py_ver >= (3, 7): - from metaflow._vendor import importlib_metadata as metadata - elif _py_ver >= (3, 6): - from metaflow._vendor.v3_6 import importlib_metadata as metadata - else: - from metaflow._vendor.v3_5 import importlib_metadata as metadata - _metadata_package = metadata - return _metadata_package +if _py_ver >= (3, 8): + from importlib import metadata +elif _py_ver >= (3, 7): + from metaflow._vendor import importlib_metadata as metadata +else: + from metaflow._vendor.v3_6 import importlib_metadata as metadata @develop.group(short_help="Stubs management") @@ -45,12 +33,6 @@ def stubs(ctx: Any): This CLI provides utilities to check and generate stubs for your current Metaflow installation. """ - if _check_stubs_supported() is None: - raise click.UsageError( - "Building and installing stubs are not supported on Python %d.%d " - "(3.4 minimum required)" % _py_ver, - ctx=ctx, - ) @stubs.command(short_help="Check validity of stubs") @@ -330,14 +312,14 @@ def get_packages_for_stubs() -> Tuple[List[Tuple[str, str]], List[str]]: # some reason it shows up multiple times. interesting_dists = [ d - for d in _metadata_package.distributions() + for d in metadata.distributions() if any( [ p == "metaflow-stubs" for p in (d.read_text("top_level.txt") or "").split() ] ) - and isinstance(d, _metadata_package.PathDistribution) + and isinstance(d, metadata.PathDistribution) ] for dist in interesting_dists: diff --git a/metaflow/extension_support/__init__.py b/metaflow/extension_support/__init__.py index f1f25b7777e..6c6e4c860e9 100644 --- a/metaflow/extension_support/__init__.py +++ b/metaflow/extension_support/__init__.py @@ -103,9 +103,6 @@ def load_module(module_name): def get_modules(extension_point): modules_to_load = [] - if not _mfext_supported: - _ext_debug("Not supported for your Python version -- 3.4+ is needed") - return [] if extension_point not in _extension_points: raise RuntimeError( "Metaflow extension point '%s' not supported" % extension_point @@ -146,9 +143,6 @@ def dump_module_info(all_packages=None, pkgs_per_extension_point=None): def get_extensions_in_dir(d): - if not _mfext_supported: - _ext_debug("Not supported for your Python version -- 3.4+ is needed") - return None, None return _get_extension_packages(ignore_info_file=True, restrict_to_directories=[d]) @@ -312,21 +306,16 @@ def multiload_all(modules, extension_point, dst_globals): _py_ver = sys.version_info[:2] -_mfext_supported = False _aliased_modules = [] -if _py_ver >= (3, 4): - import importlib.util +import importlib.util - if _py_ver >= (3, 8): - from importlib import metadata - elif _py_ver >= (3, 7): - from metaflow._vendor import importlib_metadata as metadata - elif _py_ver >= (3, 6): - from metaflow._vendor.v3_6 import importlib_metadata as metadata - else: - from metaflow._vendor.v3_5 import importlib_metadata as metadata - _mfext_supported = True +if _py_ver >= (3, 8): + from importlib import metadata +elif _py_ver >= (3, 7): + from metaflow._vendor import importlib_metadata as metadata +else: + from metaflow._vendor.v3_6 import importlib_metadata as metadata # Extension points are the directories that can be present in a EXT_PKG to # contribute to that extension point. For example, if you have @@ -355,10 +344,6 @@ def _ext_debug(*args, **kwargs): def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None): - if not _mfext_supported: - _ext_debug("Not supported for your Python version -- 3.4+ is needed") - return {}, {} - # If we have an INFO file with the appropriate information (if running from a saved # code package for example), we use that directly # Pre-compute on _extension_points @@ -381,12 +366,11 @@ def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None try: extensions_module = importlib.import_module(EXT_PKG) except ImportError as e: - if _py_ver >= (3, 6): - # e.name is set to the name of the package that fails to load - # so don't error ONLY IF the error is importing this module (but do - # error if there is a transitive import error) - if not (isinstance(e, ModuleNotFoundError) and e.name == EXT_PKG): - raise + # e.name is set to the name of the package that fails to load + # so don't error ONLY IF the error is importing this module (but do + # error if there is a transitive import error) + if not (isinstance(e, ModuleNotFoundError) and e.name == EXT_PKG): + raise return {}, {} if restrict_to_directories: @@ -894,20 +878,19 @@ def _attempt_load_module(module_name): try: extension_module = importlib.import_module(module_name) except ImportError as e: - if _py_ver >= (3, 6): - # e.name is set to the name of the package that fails to load - # so don't error ONLY IF the error is importing this module (but do - # error if there is a transitive import error) - errored_names = [EXT_PKG] - parts = module_name.split(".") - for p in parts[1:]: - errored_names.append("%s.%s" % (errored_names[-1], p)) - if not (isinstance(e, ModuleNotFoundError) and e.name in errored_names): - print( - "The following exception occurred while trying to load '%s' ('%s')" - % (EXT_PKG, module_name) - ) - raise + # e.name is set to the name of the package that fails to load + # so don't error ONLY IF the error is importing this module (but do + # error if there is a transitive import error) + errored_names = [EXT_PKG] + parts = module_name.split(".") + for p in parts[1:]: + errored_names.append("%s.%s" % (errored_names[-1], p)) + if not (isinstance(e, ModuleNotFoundError) and e.name in errored_names): + print( + "The following exception occurred while trying to load '%s' ('%s')" + % (EXT_PKG, module_name) + ) + raise _ext_debug(" Unknown error when loading '%s': %s" % (module_name, e)) return None else: diff --git a/metaflow/package/mfenv.py b/metaflow/package/mfenv.py index 6c5ad8c6527..28e49f38b35 100644 --- a/metaflow/package/mfenv.py +++ b/metaflow/package/mfenv.py @@ -6,7 +6,6 @@ import tarfile from collections import defaultdict -from dataclasses import dataclass from pathlib import Path from typing import ( Callable, @@ -15,6 +14,7 @@ Iterator, List, Mapping, + NamedTuple, Optional, Set, Tuple, @@ -103,11 +103,9 @@ def modules_to_distributions() -> Dict[str, List[metadata.Distribution]]: return _cached_distributions -@dataclass -class _ModuleInfo: - name: str - root_paths: Set[str] - module: ModuleType +_ModuleInfo = NamedTuple( + "_ModuleInfo", [("name", str), ("root_paths", Set[str]), ("module", ModuleType)] +) class PackagedDistribution(metadata.Distribution): @@ -129,7 +127,9 @@ def read_text(self, filename: Union[str, os.PathLike[str]]) -> Optional[str]: read_text.__doc__ = metadata.Distribution.read_text.__doc__ - def locate_file(self, path: Union[str, os.PathLike[str]]) -> metadata.SimplePath: + # Returns a metadata.SimplePath but not always present in importlib.metadata libs so + # skipping return type. + def locate_file(self, path: Union[str, os.PathLike]): return self._root / path diff --git a/metaflow/plugins/pypi/bootstrap.py b/metaflow/plugins/pypi/bootstrap.py index 1dd0991f3ec..584a9950fe0 100644 --- a/metaflow/plugins/pypi/bootstrap.py +++ b/metaflow/plugins/pypi/bootstrap.py @@ -14,7 +14,7 @@ from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONDA_USE_FAST_INIT from metaflow.plugins import DATASTORES from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL -from metaflow.util import which +from metaflow.util import get_metaflow_root, which from urllib.request import Request import warnings @@ -366,7 +366,7 @@ def fast_setup_environment(architecture, storage, env, prefix, pkgs_dir): # Move MAGIC_FILE inside local datastore. os.makedirs(manifest_dir, exist_ok=True) shutil.move( - os.path.join(os.getcwd(), MAGIC_FILE), + os.path.join(get_metaflow_root(), MAGIC_FILE), os.path.join(manifest_dir, MAGIC_FILE), ) with open(os.path.join(manifest_dir, MAGIC_FILE)) as f: diff --git a/metaflow/vendor.py b/metaflow/vendor.py index 56b952324cc..ab54aa1f806 100644 --- a/metaflow/vendor.py +++ b/metaflow/vendor.py @@ -11,7 +11,6 @@ "README.txt", "__init__.py", "vendor_any.txt", - "vendor_v3_5.txt", "vendor_v3_6.txt", "pip.LICENSE", }