Skip to content

Commit a665e58

Browse files
committed
Capture maintainers of packages too
1 parent fc3fcc5 commit a665e58

File tree

4 files changed

+46
-4
lines changed

4 files changed

+46
-4
lines changed

.dockerfile

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
venv
2+
*.db
3+

Dockerfile

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
FROM python:3.9-slim
2+
3+
WORKDIR /pypi-data
4+
COPY . /pypi-data
5+
RUN python -m pip install -r requirements.txt
6+
CMD python main.py

README.md

+15-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ Mostly up-to-date data about almost every package on PyPI
55
Get access to the database via [GitHub releases](https://github.com/sethmlarson/pypi-data/releases).
66

77
```console
8-
$ git clone https://github.com/sethmlarson/pypi-data
9-
$ cd pypi-data
8+
$ gunzip pypi.db.gz
109
$ sqlite3 'pypi.db' 'SELECT * FROM packages LIMIT 10 OFFSET 1000;'
1110

1211
acid-vault|1.3.2|>=3.6|1|0|2021-01-21 04:37:10
@@ -29,6 +28,7 @@ ackg|0.0.5||0|0|2021-01-21 04:37:10
2928
- Python requirements (`requires_python`)
3029
- Yanked versions (`yanked`)
3130
- Wheel data (`python_tags`, `abi_tags`, `platform_tags`)
31+
- Maintainers on PyPI
3232

3333
### Database Schemas
3434

@@ -64,6 +64,19 @@ CREATE TABLE wheels (
6464
abi STRING,
6565
platform STRING
6666
);
67+
68+
-- Maintainer data --
69+
CREATE TABLE wheels (
70+
name STRING,
71+
package_name STRING
72+
);
73+
```
74+
75+
## Running locally
76+
77+
```
78+
$ docker build -t pypi-data .
79+
$ docker run --rm pypi-data
6780
```
6881

6982
## License

main.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@
7070
);
7171
"""
7272
)
73+
db.execute("""
74+
CREATE TABLE IF NOT EXISTS maintainers (
75+
name STRING,
76+
package_name STRING
77+
);
78+
""")
7379
db.commit()
7480

7581

@@ -179,10 +185,20 @@ def get_metadata_by_install(package, resp):
179185
resp = resp.copy()
180186
resp["info"]["requires_dist"] = package_metadata["requires_dist"]
181187
resp["info"]["requires_python"] = package_metadata["requires_python"]
182-
print(package_metadata)
183188
return resp
184189

185190

191+
def get_maintainers_from_pypi(package: str):
192+
for _ in range(5):
193+
resp = http.request("GET", f"https://pypi.org/project/{package}")
194+
if resp.status == 404:
195+
return set()
196+
elif resp.status != 200:
197+
continue
198+
return set(re.findall(r"<a href=\"/user/([^/]+)/\" aria-label=", resp.data.decode("utf-8")))
199+
return set()
200+
201+
186202
def update_data_from_pypi():
187203
for package in tqdm(packages, unit="packages"):
188204
resp = http.request("GET", f"https://pypi.org/pypi/{package}/json")
@@ -289,6 +305,11 @@ def update_data_from_pypi():
289305
)
290306
db.commit()
291307

308+
for maintainer in get_maintainers_from_pypi(package):
309+
db.execute("""
310+
INSERT OR IGNORE INTO maintainers (name, package_name) VALUES (?, ?);
311+
""", (maintainer, package))
312+
292313
for req in urequires_dist:
293314
extras = get_extras(req)
294315
req_no_specifiers = dist_from_requires_dist(req)
@@ -341,5 +362,4 @@ def update_data_from_pypi():
341362

342363
db.commit()
343364

344-
345365
update_data_from_pypi()

0 commit comments

Comments
 (0)