Skip to content

Commit e65436d

Browse files
authored
Merge pull request #5 from mcg1969/better-pip
Better pip support
2 parents 0b5818e + ac7d77a commit e65436d

33 files changed

+777
-607
lines changed

conda-recipe/meta.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ requirements:
1818
run:
1919
- python {{ python }}
2020
- pandas
21+
- setuptools
2122

2223
test:
2324
source_files:

project_inspect/environments.py

Lines changed: 77 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .utils import load_file
1414

1515
import logging
16+
import pkg_resources
1617
logger = logging.getLogger(__name__)
1718

1819
__all__ = ['environment_by_prefix', 'kernel_name_to_prefix']
@@ -32,6 +33,7 @@ def get_python_builtins(pybin):
3233
Returns:
3334
set: a set of module names.
3435
'''
36+
3537
try:
3638
cmd = [pybin, '-c', 'import sys, json; print(json.dumps(sys.builtin_module_names))']
3739
pycall = subprocess.check_output(cmd)
@@ -42,78 +44,23 @@ def get_python_builtins(pybin):
4244
return set(sys.builtin_module_names)
4345

4446

45-
def parse_egg_info(path):
47+
def get_python_path(prefix):
4648
'''
47-
Returns the name, version, and key file list for a pip package.
49+
Determines the standard python path for a given prefix.
4850
4951
Args:
50-
path (str): path to the egg file or directory
52+
prefix (str): path to the base of the Python environment.
5153
Returns:
52-
name (str): the name of the package.
53-
version (str): the version of the package.
54-
files (list of str): a list of the Python files found in the
55-
manifest file (SOURCES.txt, RECORD), if such a file is found.
56-
If the manifest is not found, an empty list is returned.
54+
results: a list of paths.
5755
'''
58-
name = basename(path).rsplit('.', 1)[0]
59-
if path.endswith('.egg-link'):
60-
data = load_file(path)
61-
if data is not None:
62-
spdir = fp.read().splitlines()[0].strip()
63-
name = name.replace('-', '_')
64-
if data is not None:
65-
path = join(spdir, name + '.egg-info')
66-
version = '<dev>'
67-
else:
68-
spdir = dirname(path)
69-
name, version = name.rsplit('-', 2)[:2]
70-
pdata = {'name': name.lower(),
71-
'version': version,
72-
'build': '<pip>',
73-
'depends': set(),
74-
'modules': {'python': set(), 'r': set()}}
75-
if path.endswith('.egg'):
76-
pdata['modules']['python'].update(get_python_importables(path))
77-
path = join(path, 'EGG-INFO')
78-
else:
79-
tops = [name]
80-
tname = name.split('.', 1)[0]
81-
tlpath = join(path, 'top_level.txt')
82-
if exists(tlpath):
83-
tldata = load_file(tlpath)
84-
if tldata is not None:
85-
tops.extend(line for line in map(str.rstrip, tldata.splitlines())
86-
if line and line != tname)
87-
for top in tops:
88-
mparts = top.split('.')
89-
level = len(mparts)
90-
mpath = join(spdir, *mparts)
91-
pdata['modules']['python'].update(get_python_importables(mpath, level=level))
92-
fname = 'METADATA' if path.endswith('.dist-info') else 'PKG-INFO'
93-
fpath = join(path, fname)
94-
info = {}
95-
if isfile(fpath):
96-
data = load_file(fpath) or ''
97-
for line in data.splitlines():
98-
m = re.match(r'(\w+):\s*(\S+)', line, re.I)
99-
if m:
100-
key = m.group(1).lower()
101-
info.setdefault(key, []).append(m.group(2))
102-
break
103-
if 'Requires-Dist' in info:
104-
pdata['depends'].update(x.split(' ', 1)[0].lower() for x in info['requires-dist'])
105-
else:
106-
req_txt = join(path, 'requires.txt')
107-
if exists(req_txt):
108-
data = load_file(req_txt) or ''
109-
for dep in data.splitlines():
110-
m = re.match(r'^([\w_-]+)', dep)
111-
if not m:
112-
break
113-
pdata['depends'].add(m.groups()[0].lower())
114-
return pdata
11556

116-
57+
results = (glob(join(prefix, 'lib', 'python*.zip')) +
58+
glob(join(prefix, 'lib', 'python?.?')) +
59+
glob(join(prefix, 'lib', 'python?.?', 'lib-dynload')) +
60+
glob(join(prefix, 'lib', 'python?.?', 'site-packages')))
61+
return results
62+
63+
11764
def parse_conda_meta(mpath):
11865
mdata = load_file(mpath) or {}
11966
fname, fversion, fbuild = basename(mpath).rsplit('.', 1)[0].rsplit('-', 2)
@@ -164,22 +111,51 @@ def get_eggs(sp_dir):
164111
Returns:
165112
list: a list of the egg files/dirs found in that directory.
166113
'''
167-
results = []
114+
results = {}
168115
for fn in os.listdir(sp_dir):
169-
if not fn.endswith(('.egg', '.egg-info', '.dist-info', '.egg-link')):
116+
if not fn.endswith(('.egg-info', '.dist-info', '.egg', '.egg-link')):
170117
continue
171-
path = join(sp_dir, fn)
172-
if fn.endswith('.egg-link'):
173-
data = load_file(path)
174-
if data is not None:
175-
sp_dir = data.splitlines()[0].strip()
176-
name = fn.rsplit('.', 1)[0].replace('-', '_')
177-
path = join(sp_dir, name + '.egg-info')
178-
if (isfile(path) or exists(join(path, 'METADATA')) or
179-
exists(join(path, 'PKG-INFO')) or
180-
exists(join(path, 'METADATA'))):
181-
results.append(fn)
182-
return set(results)
118+
fullpath = os.path.join(sp_dir, fn)
119+
factory = pkg_resources.dist_factory(sp_dir, fn, False)
120+
try:
121+
dists = list(factory(fullpath))
122+
except Exception as e:
123+
logger.warning('Error reading eggs in {}:\n{}'.format(fullpath, e))
124+
dists = []
125+
pdata = {'name': None,
126+
'version': None,
127+
'build': '<pip>',
128+
'depends': set(),
129+
'modules': {'python': set(), 'r': set()}}
130+
results[fn] = pdata
131+
for dist in dists:
132+
if pdata['name'] is None:
133+
pdata['name'] = dist.project_name
134+
pdata['version'] = dist.version or '<dev>'
135+
pdata['depends'].update(r.name for r in dist.requires())
136+
sources = 'RECORD' if dist.has_metadata('RECORD') else 'SOURCES.txt'
137+
if dist.has_metadata(sources) and dist.has_metadata('top_level.txt'):
138+
sources = list(map(str.strip, dist.get_metadata(sources).splitlines()))
139+
top_level = list(map(str.strip, dist.get_metadata('top_level.txt').splitlines()))
140+
for top in top_level:
141+
top_s = top + '/'
142+
for src in sources:
143+
src = src.split(',', 1)[0]
144+
if src.endswith('__init__.py'):
145+
src = dirname(src)
146+
elif src.endswith(('.py', '.so')):
147+
src = src[:-3]
148+
else:
149+
continue
150+
pdata['modules']['python'].add(src.replace('/', '.'))
151+
if not pdata['name']:
152+
name, version = fn.rsplit('.', 1)[0], '<dev>'
153+
if fn.endswith('.dist-info'):
154+
name, version = fn.rsplit('-', 1)
155+
elif fn.endswith('.egg-info'):
156+
name, version, _ = fn.rsplit('-', 2)
157+
pdata['name'], pdata['version'] = name, version
158+
return results
183159

184160

185161
@functools.lru_cache()
@@ -194,6 +170,8 @@ def get_python_importables(path, level=0):
194170
gen = [(dirname(path), [], [basename(path) + sfx])]
195171
path = dirname(path)
196172
level -= 1
173+
else:
174+
return modules
197175
root_path = path.rstrip('/')
198176
while level > 0:
199177
root_path = dirname(root_path)
@@ -207,13 +185,7 @@ def get_python_importables(path, level=0):
207185
if file.startswith('.'):
208186
continue
209187
fpath = join(root, file)
210-
if file.endswith('.pth'):
211-
data = load_file(fpath) or ''
212-
for npath in map(str.strip, data.splitlines()):
213-
if npath:
214-
npath = abspath(join(root, npath.strip()))
215-
modules.update(get_python_importables(npath))
216-
elif file == '__init__.py':
188+
if file == '__init__.py':
217189
modules[base_module] = fpath
218190
elif file.endswith(('.so', '.py')):
219191
file = file.rsplit('.', 1)[0]
@@ -234,12 +206,20 @@ def _create(bname):
234206
'modules': {'python': set(), 'r': set()},
235207
'imports': {'python': set(), 'r': set()}}
236208
return packages[bname]
209+
all_modules = {}
210+
for pdata in get_eggs(path).values():
211+
packages[pdata['name']] = pdata
212+
pdata['build'] = '<local>'
213+
pdata['imports'] = {'python': set(), 'r': set()}
214+
all_modules.update((k, pdata['name']) for k in pdata['modules']['python'])
237215
for module, fpath in get_python_importables(path).items():
238-
bname = './' + module.split('.', 1)[0]
239-
if exists(join(path, bname) + '.py'):
240-
bname += '.py'
241-
pdata = _create(bname)
216+
bname = all_modules.get(module)
217+
if bname is None:
218+
bname = './' + module.split('.', 1)[0]
219+
if exists(join(path, bname) + '.py'):
220+
bname += '.py'
242221
imports, _ = find_file_imports(fpath, submodules=True)
222+
pdata = _create(bname)
243223
pdata['modules']['python'].add(module)
244224
pdata['imports']['python'].update(imports)
245225
for fpath in glob(join(path, '*.R')) + glob(join(path, '*.ipynb')):
@@ -295,13 +275,14 @@ def environment_by_prefix(envdir, local=None):
295275

296276
# Find all non-conda egg directories and determine package name and version
297277
# If a manifest exists, use that to remove imports from the unmanaged list
298-
eggfiles = set()
278+
eggfiles = {}
299279
for spdir in glob(join(envdir, 'lib', 'python*', 'site-packages')):
300-
eggfiles = get_eggs(spdir)
280+
eggfiles.update(get_eggs(spdir))
301281
for pdata in packages.values():
302-
eggfiles -= pdata['eggs']
303-
for eggfile in eggfiles:
304-
pdata = parse_egg_info(join(spdir, eggfile))
282+
for egg in pdata['eggs']:
283+
if egg in eggfiles:
284+
del eggfiles[egg]
285+
for eggfile, pdata in eggfiles.items():
305286
pname = pdata['name']
306287
packages[pdata['name']] = pdata
307288
for language, mdata in pdata['modules'].items():

tests/darwin/all_all.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
n_owners,n_projects,n_environments,n_required,n_requested,n_python,n_r
2-
2,4,6,468,35,6,4
2+
2,4,6,468,36,6,4

tests/darwin/all_package.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ scikit-learn,2,2,2,0,0
397397
scipy,2,3,3,1,0
398398
seaborn,2,2,2,0,0
399399
send2trash,2,4,6,0,0
400-
setuptools,2,4,6,6,0
400+
setuptools,2,4,6,6,1
401401
simplegeneric,2,3,3,1,0
402402
singledispatch,2,3,3,1,0
403403
sip,2,3,3,0,0

0 commit comments

Comments
 (0)