1
1
from __future__ import annotations
2
2
import contextlib
3
- import csv
4
3
import itertools
5
4
import json
6
- import logging
7
5
import os
8
6
import re
9
7
import sqlite3
27
25
28
26
MAX_WORKERS = 16
29
27
GOOGLE_ASSURED_OSS_PACKAGES = set ()
28
+ DOWNLOADS_URL = "https://raw.githubusercontent.com/hugovk/top-pypi-packages/main/top-pypi-packages-30-days.min.json"
30
29
31
30
@contextlib .contextmanager
32
31
def locked_db ():
@@ -254,13 +253,14 @@ def update_data_for_package(package: str) -> None:
254
253
yanked = []
255
254
256
255
releases = resp ["releases" ][str_version ]
257
- uploaded_at = None if not releases else min (x ["upload_time" ] for x in releases )
256
+ first_uploaded_at = None if not releases else min (x ["upload_time" ] for x in releases )
257
+ last_uploaded_at = None if not releases else max (x ["upload_time" ] for x in releases )
258
258
wheel_data = [
259
- (x ["filename" ], x ["url" ]) for x in releases if x ["filename" ].endswith (".whl" )
259
+ (x ["filename" ], x ["url" ], x [ "upload_time" ] ) for x in releases if x ["filename" ].endswith (".whl" )
260
260
]
261
261
has_binary_wheel = False
262
262
263
- for filename , _ in wheel_data :
263
+ for filename , _ , uploaded_at in wheel_data :
264
264
try :
265
265
whl = parse_wheel_filename (filename )
266
266
except InvalidFilenameError :
@@ -276,31 +276,36 @@ def update_data_for_package(package: str) -> None:
276
276
db .execute (
277
277
"""
278
278
INSERT INTO wheels (
279
- package_name, filename, build, python, abi, platform
280
- ) VALUES (?, ?, ?, ?, ?, ?);
279
+ package_name, filename, build, python, abi, platform, uploaded_at
280
+ ) VALUES (?, ?, ?, ?, ?, ?, ? );
281
281
""" ,
282
- (package , filename , whl .build , py , abi , plat ),
282
+ (package , filename , whl .build , py , abi , plat , uploaded_at ),
283
283
)
284
284
285
285
if abi_tags == ["none" ] and platform_tags == ["any" ]:
286
286
continue
287
287
288
288
has_binary_wheel = True
289
289
290
+ # Check if the package has any known vulnerabilities.
291
+ has_vulnerabilities = bool (resp .get ("vulnerabilities" , []))
292
+
290
293
package_downloads = downloads .get (package , 0 )
291
294
with locked_db () as db :
292
295
db .execute (
293
296
"""
294
297
INSERT OR IGNORE INTO packages (
295
- name, version, requires_python, has_binary_wheel, uploaded_at , downloads, scorecard_overall, in_google_assured_oss
296
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?);
298
+ name, version, requires_python, has_binary_wheel, has_vulnerabilities, first_uploaded_at, last_uploaded_at , downloads, scorecard_overall, in_google_assured_oss
299
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ? );
297
300
""" ,
298
301
(
299
302
package ,
300
303
str_version ,
301
304
requires_python ,
302
305
has_binary_wheel ,
303
- uploaded_at ,
306
+ has_vulnerabilities ,
307
+ first_uploaded_at ,
308
+ last_uploaded_at ,
304
309
package_downloads ,
305
310
scorecard_overall ,
306
311
package .lower () in GOOGLE_ASSURED_OSS_PACKAGES
@@ -460,11 +465,10 @@ def get_google_assured_oss_packages(http: urllib3.PoolManager) -> set[str]:
460
465
pypi_deps_db = os .path .join (base_dir , "pypi.db" )
461
466
462
467
downloads = {}
463
- with open (os .path .join (base_dir , "downloads.csv" )) as f :
464
- csv = csv .reader (f )
465
- next (csv )
466
- for project , dls in csv :
467
- downloads [project ] = int (dls )
468
+ resp = http .request ("GET" , DOWNLOADS_URL )
469
+ assert resp .status == 200
470
+ for row in resp .json ()["rows" ]:
471
+ downloads [row ["project" ]] = row ["download_count" ]
468
472
469
473
_DB = sqlite3 .connect (os .path .join (base_dir , "pypi.db" ), check_same_thread = False )
470
474
_DB .execute (
@@ -475,7 +479,9 @@ def get_google_assured_oss_packages(http: urllib3.PoolManager) -> set[str]:
475
479
requires_python TEXT,
476
480
yanked BOOLEAN DEFAULT 0,
477
481
has_binary_wheel BOOLEAN,
478
- uploaded_at TIMESTAMP,
482
+ has_vulnerabilities BOOLEAN,
483
+ first_uploaded_at TIMESTAMP,
484
+ last_uploaded_at TIMESTAMP,
479
485
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
480
486
downloads INTEGER,
481
487
scorecard_overall FLOAT,
@@ -508,6 +514,7 @@ def get_google_assured_oss_packages(http: urllib3.PoolManager) -> set[str]:
508
514
python TEXT,
509
515
abi TEXT,
510
516
platform TEXT,
517
+ uploaded_at TIMESTAMP,
511
518
FOREIGN KEY (package_name) REFERENCES packages(name)
512
519
);
513
520
"""
0 commit comments