mcg1969
diff --git a/‎conda-recipe/meta.yaml‎
Lines changed: 1 addition & 0 deletions b/‎conda-recipe/meta.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎project_inspect/environments.py‎
Lines changed: 77 additions & 96 deletions b/‎project_inspect/environments.py‎
Lines changed: 77 additions & 96 deletions
diff --git a/‎tests/darwin/all_all.csv‎
Lines changed: 1 addition & 1 deletion b/‎tests/darwin/all_all.csv‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/darwin/all_package.csv‎
Lines changed: 1 addition & 1 deletion b/‎tests/darwin/all_package.csv‎
Lines changed: 1 addition & 1 deletion
@@ -18,6 +18,7 @@ requirements:
   run:
     - python {{ python }}
     - pandas
+    - setuptools
 
 test:
   source_files:
 
@@ -13,6 +13,7 @@
 from .utils import load_file
 
 import logging
+import pkg_resources
 logger = logging.getLogger(__name__)
 
 __all__ = ['environment_by_prefix', 'kernel_name_to_prefix']
@@ -32,6 +33,7 @@ def get_python_builtins(pybin):
     Returns:
         set: a set of module names.
     '''
+
     try:
         cmd = [pybin, '-c', 'import sys, json; print(json.dumps(sys.builtin_module_names))']
         pycall = subprocess.check_output(cmd)
@@ -42,78 +44,23 @@ def get_python_builtins(pybin):
         return set(sys.builtin_module_names)
 
 
-def parse_egg_info(path):
+def get_python_path(prefix):
     '''
-    Returns the name, version, and key file list for a pip package.
+    Determines the standard python path for a given prefix.
 
     Args:
-        path (str): path to the egg file or directory
+        prefix (str): path to the base of the Python environment.
     Returns:
-        name (str): the name of the package.
-        version (str): the version of the package.
-        files (list of str): a list of the Python files found in the
-            manifest file (SOURCES.txt, RECORD), if such a file is found.
-            If the manifest is not found, an empty list is returned.
+        results: a list of paths.
     '''
-    name = basename(path).rsplit('.', 1)[0]
-    if path.endswith('.egg-link'):
-        data = load_file(path)
-        if data is not None:
-            spdir = fp.read().splitlines()[0].strip()
-        name = name.replace('-', '_')
-        if data is not None:
-            path = join(spdir, name + '.egg-info')
-        version = '<dev>'
-    else:
-        spdir = dirname(path)
-        name, version = name.rsplit('-', 2)[:2]
-    pdata = {'name': name.lower(),
-             'version': version,
-             'build': '<pip>',
-             'depends': set(),
-             'modules': {'python': set(), 'r': set()}}
-    if path.endswith('.egg'):
-        pdata['modules']['python'].update(get_python_importables(path))
-        path = join(path, 'EGG-INFO')
-    else:
-        tops = [name]
-        tname = name.split('.', 1)[0]
-        tlpath = join(path, 'top_level.txt')
-        if exists(tlpath):
-            tldata = load_file(tlpath)
-            if tldata is not None:
-                tops.extend(line for line in map(str.rstrip, tldata.splitlines())
-                            if line and line != tname)
-        for top in tops:
-            mparts = top.split('.')
-            level = len(mparts)
-            mpath = join(spdir, *mparts)
-            pdata['modules']['python'].update(get_python_importables(mpath, level=level))
-    fname = 'METADATA' if path.endswith('.dist-info') else 'PKG-INFO'
-    fpath = join(path, fname)
-    info = {}
-    if isfile(fpath):
-        data = load_file(fpath) or ''
-        for line in data.splitlines():
-            m = re.match(r'(\w+):\s*(\S+)', line, re.I)
-            if m:
-                key = m.group(1).lower()
-                info.setdefault(key, []).append(m.group(2))
-            break
-    if 'Requires-Dist' in info:
-        pdata['depends'].update(x.split(' ', 1)[0].lower() for x in info['requires-dist'])
-    else:
-        req_txt = join(path, 'requires.txt')
-        if exists(req_txt):
-            data = load_file(req_txt) or ''
-            for dep in data.splitlines():
-                m = re.match(r'^([\w_-]+)', dep)
-                if not m:
-                    break
-                pdata['depends'].add(m.groups()[0].lower())
-    return pdata
 
-    
+    results = (glob(join(prefix, 'lib', 'python*.zip')) +
+               glob(join(prefix, 'lib', 'python?.?')) +
+               glob(join(prefix, 'lib', 'python?.?', 'lib-dynload')) +
+               glob(join(prefix, 'lib', 'python?.?', 'site-packages')))
+    return results
+
+
 def parse_conda_meta(mpath):
     mdata = load_file(mpath) or {}
     fname, fversion, fbuild = basename(mpath).rsplit('.', 1)[0].rsplit('-', 2)
@@ -164,22 +111,51 @@ def get_eggs(sp_dir):
     Returns:
         list: a list of the egg files/dirs found in that directory.
     '''
-    results = []
+    results = {}
     for fn in os.listdir(sp_dir):
-        if not fn.endswith(('.egg', '.egg-info', '.dist-info', '.egg-link')):
+        if not fn.endswith(('.egg-info', '.dist-info', '.egg', '.egg-link')):
             continue
-        path = join(sp_dir, fn)
-        if fn.endswith('.egg-link'):
-            data = load_file(path)
-            if data is not None:
-                sp_dir = data.splitlines()[0].strip()
-            name = fn.rsplit('.', 1)[0].replace('-', '_')
-            path = join(sp_dir, name + '.egg-info')
-        if (isfile(path) or exists(join(path, 'METADATA')) or
-            exists(join(path, 'PKG-INFO')) or
-            exists(join(path, 'METADATA'))):
-            results.append(fn)
-    return set(results)
+        fullpath = os.path.join(sp_dir, fn)
+        factory = pkg_resources.dist_factory(sp_dir, fn, False)
+        try:
+            dists = list(factory(fullpath))
+        except Exception as e:
+            logger.warning('Error reading eggs in {}:\n{}'.format(fullpath, e))
+            dists = []
+        pdata = {'name': None,
+                 'version': None,
+                 'build': '<pip>',
+                 'depends': set(),
+                 'modules': {'python': set(), 'r': set()}}
+        results[fn] = pdata
+        for dist in dists:
+            if pdata['name'] is None:
+                pdata['name'] = dist.project_name
+                pdata['version'] = dist.version or '<dev>'
+            pdata['depends'].update(r.name for r in dist.requires())
+            sources = 'RECORD' if dist.has_metadata('RECORD') else 'SOURCES.txt'
+            if dist.has_metadata(sources) and dist.has_metadata('top_level.txt'):
+                sources = list(map(str.strip, dist.get_metadata(sources).splitlines()))
+                top_level = list(map(str.strip, dist.get_metadata('top_level.txt').splitlines()))
+                for top in top_level:
+                    top_s = top + '/'
+                    for src in sources:
+                            src = src.split(',', 1)[0]
+                            if src.endswith('__init__.py'):
+                                src = dirname(src)
+                            elif src.endswith(('.py', '.so')):
+                                src = src[:-3]
+                            else:
+                                continue
+                            pdata['modules']['python'].add(src.replace('/', '.'))
+        if not pdata['name']:
+            name, version = fn.rsplit('.', 1)[0], '<dev>'
+            if fn.endswith('.dist-info'):
+                name, version = fn.rsplit('-', 1)
+            elif fn.endswith('.egg-info'):
+                name, version, _ = fn.rsplit('-', 2)
+            pdata['name'], pdata['version'] = name, version
+    return results
 
 
 @functools.lru_cache()
@@ -194,6 +170,8 @@ def get_python_importables(path, level=0):
                 gen = [(dirname(path), [], [basename(path) + sfx])]
                 path = dirname(path)
                 level -= 1
+    else:
+        return modules
     root_path = path.rstrip('/')
     while level > 0:
         root_path = dirname(root_path)
@@ -207,13 +185,7 @@ def get_python_importables(path, level=0):
             if file.startswith('.'):
                 continue
             fpath = join(root, file)
-            if file.endswith('.pth'):
-                data = load_file(fpath) or ''
-                for npath in map(str.strip, data.splitlines()):
-                    if npath:
-                        npath = abspath(join(root, npath.strip()))
-                        modules.update(get_python_importables(npath))
-            elif file == '__init__.py':
+            if file == '__init__.py':
                 modules[base_module] = fpath
             elif file.endswith(('.so', '.py')):
                 file = file.rsplit('.', 1)[0]
@@ -234,12 +206,20 @@ def _create(bname):
                                'modules': {'python': set(), 'r': set()},
                                'imports': {'python': set(), 'r': set()}}
         return packages[bname]
+    all_modules = {}
+    for pdata in get_eggs(path).values():
+        packages[pdata['name']] = pdata
+        pdata['build'] = '<local>'
+        pdata['imports'] = {'python': set(), 'r': set()}
+        all_modules.update((k, pdata['name']) for k in pdata['modules']['python'])
     for module, fpath in get_python_importables(path).items():
-        bname = './' + module.split('.', 1)[0]
-        if exists(join(path, bname) + '.py'):
-            bname += '.py'
-        pdata = _create(bname)
+        bname = all_modules.get(module)
+        if bname is None:
+            bname = './' + module.split('.', 1)[0]
+            if exists(join(path, bname) + '.py'):
+                bname += '.py'
         imports, _ = find_file_imports(fpath, submodules=True)
+        pdata = _create(bname)
         pdata['modules']['python'].add(module)
         pdata['imports']['python'].update(imports)
     for fpath in glob(join(path, '*.R')) + glob(join(path, '*.ipynb')):
@@ -295,13 +275,14 @@ def environment_by_prefix(envdir, local=None):
 
     # Find all non-conda egg directories and determine package name and version
     # If a manifest exists, use that to remove imports from the unmanaged list
-    eggfiles = set()
+    eggfiles = {}
     for spdir in glob(join(envdir, 'lib', 'python*', 'site-packages')):
-        eggfiles = get_eggs(spdir)
+        eggfiles.update(get_eggs(spdir))
     for pdata in packages.values():
-        eggfiles -= pdata['eggs']
-    for eggfile in eggfiles:
-        pdata = parse_egg_info(join(spdir, eggfile))
+        for egg in pdata['eggs']:
+            if egg in eggfiles:
+                del eggfiles[egg]
+    for eggfile, pdata in eggfiles.items():
         pname = pdata['name']
         packages[pdata['name']] = pdata
         for language, mdata in pdata['modules'].items():
 
@@ -1,2 +1,2 @@
 n_owners,n_projects,n_environments,n_required,n_requested,n_python,n_r
-2,4,6,468,35,6,4
+2,4,6,468,36,6,4
@@ -397,7 +397,7 @@ scikit-learn,2,2,2,0,0
 scipy,2,3,3,1,0
 seaborn,2,2,2,0,0
 send2trash,2,4,6,0,0
-setuptools,2,4,6,6,0
+setuptools,2,4,6,6,1
 simplegeneric,2,3,3,1,0
 singledispatch,2,3,3,1,0
 sip,2,3,3,0,0
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`n_owners,n_projects,n_environments,n_required,n_requested,n_python,n_r`
`2`		`-2,4,6,468,35,6,4`
	`2`	`+2,4,6,468,36,6,4`