Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 67 additions & 22 deletions src/fetchcode/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,45 +14,90 @@
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

from urllib.parse import urljoin

from packageurl import PackageURL

from fetchcode import fetch_json_response


class Pypi:
"""
This class handles Pypi PURLs.
"""
"""Handle PyPI Package URL (PURL) resolution and download URL retrieval."""

purl_pattern = "pkg:pypi/.*"
base_url = "https://pypi.org/pypi/"
base_url = "https://pypi.org/pypi"

@classmethod
def get_download_url(cls, purl):
def get_package_data(cls, purl: str) -> dict:
"""
Return the download URL for a Pypi PURL.
Fetch package data from PyPI API.

If no version is specified in the PURL, fetches the latest version.

Args:
purl: A Package URL string (e.g., "pkg:pypi/[email protected]")

Returns:
The full JSON response from PyPI API.
"""
purl = PackageURL.from_string(purl)
parsed_purl = PackageURL.from_string(purl)

name = purl.name
version = purl.version
if parsed_purl.version:
api_url = f"{cls.base_url}/{parsed_purl.name}/{parsed_purl.version}/json"
else:
api_url = f"{cls.base_url}/{parsed_purl.name}/json"

if not name or not version:
raise ValueError("Pypi PURL must specify a name and version")
return fetch_json_response(api_url)

url = urljoin(cls.base_url, f"{name}/{version}/json")
data = fetch_json_response(url)
@classmethod
def get_urls_info(cls, purl: str) -> list[dict]:
"""
Collect URL info dicts from PyPI API.

download_urls = data.get("urls", [{}])
If no version is specified in the PURL, fetches the latest version.

if not download_urls:
raise ValueError(f"No download URLs found for {name} version {version}")
Returns:
List of URL info dicts from PyPI API, or empty list if none found.
"""
data = cls.get_package_data(purl)
return data.get("urls", [])

download_url = next((url["url"] for url in download_urls if url.get("url")), None)
@classmethod
def get_download_url(cls, purl: str, preferred_type: str = "sdist") -> str | None:
"""
Get a single download URL from PyPI API.

if not download_url:
raise ValueError(f"No download URL found for {name} version {version}")
If no version is specified in the PURL, fetches the latest version.

return download_url
Args:
purl: A Package URL string (e.g., "pkg:pypi/[email protected]")
preferred_type: Preferred package type (e.g., "sdist", "bdist_wheel").
Falls back to first available if preferred type not found.

Returns:
The download URL, or None if not found.
"""
urls_info = cls.get_urls_info(purl)

if not urls_info:
return

for url_info in urls_info:
if url_info.get("packagetype") == preferred_type:
return url_info["url"]

return urls_info[0]["url"]

@classmethod
def get_all_download_urls(cls, purl: str) -> list[str]:
"""
Get all download URLs from PyPI API.

If no version is specified in the PURL, fetches the latest version.

Args:
purl: A Package URL string (e.g., "pkg:pypi/[email protected]")

Returns:
List of all available download URLs.
"""
urls_info = cls.get_urls_info(purl)
return [url_info["url"] for url_info in urls_info if "url" in url_info]
115 changes: 115 additions & 0 deletions tests/data/pypi/asgiref-3.11.0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"info": {
"author": "Django Software Foundation",
"author_email": "[email protected]",
"bugtrack_url": null,
"classifiers": [
"Development Status :: 5 - Production/Stable",
"Environment :: Web Environment",
"Intended Audience :: Developers",
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.9",
"Topic :: Internet :: WWW/HTTP"
],
"description": "asgiref\n=======\n\n.. image:: https://github.com/django/asgiref/actions/workflows/tests.yml/badge.svg\n :target: https://github.com/django/asgiref/actions/workflows/tests.yml\n\n.. image:: https://img.shields.io/pypi/v/asgiref.svg\n :target: https://pypi.python.org/pypi/asgiref\n\nASGI is a standard for Python asynchronous web apps and servers to communicate\nwith each other, and positioned as an asynchronous successor to WSGI. You can\nread more at https://asgi.readthedocs.io/en/latest/\n\nThis package includes ASGI base libraries, such as:\n\n* Sync-to-async and async-to-sync function wrappers, ``asgiref.sync``\n* Server base classes, ``asgiref.server``\n* A WSGI-to-ASGI adapter, in ``asgiref.wsgi``\n\n\nFunction wrappers\n-----------------\n\nThese allow you to wrap or decorate async or sync functions to call them from\nthe other style (so you can call async functions from a synchronous thread,\nor vice-versa).\n\nIn particular:\n\n* AsyncToSync lets a synchronous subthread stop and wait while the async\n function is called on the main thread's event loop, and then control is\n returned to the thread when the async function is finished.\n\n* SyncToAsync lets async code call a synchronous function, which is run in\n a threadpool and control returned to the async coroutine when the synchronous\n function completes.\n\nThe idea is to make it easier to call synchronous APIs from async code and\nasynchronous APIs from synchronous code so it's easier to transition code from\none style to the other. In the case of Channels, we wrap the (synchronous)\nDjango view system with SyncToAsync to allow it to run inside the (asynchronous)\nASGI server.\n\nNote that exactly what threads things run in is very specific, and aimed to\nkeep maximum compatibility with old synchronous code. See\n\"Synchronous code & Threads\" below for a full explanation. By default,\n``sync_to_async`` will run all synchronous code in the program in the same\nthread for safety reasons; you can disable this for more performance with\n``@sync_to_async(thread_sensitive=False)``, but make sure that your code does\nnot rely on anything bound to threads (like database connections) when you do.\n\n\nThreadlocal replacement\n-----------------------\n\nThis is a drop-in replacement for ``threading.local`` that works with both\nthreads and asyncio Tasks. Even better, it will proxy values through from a\ntask-local context to a thread-local context when you use ``sync_to_async``\nto run things in a threadpool, and vice-versa for ``async_to_sync``.\n\nIf you instead want true thread- and task-safety, you can set\n``thread_critical`` on the Local object to ensure this instead.\n\n\nServer base classes\n-------------------\n\nIncludes a ``StatelessServer`` class which provides all the hard work of\nwriting a stateless server (as in, does not handle direct incoming sockets\nbut instead consumes external streams or sockets to work out what is happening).\n\nAn example of such a server would be a chatbot server that connects out to\na central chat server and provides a \"connection scope\" per user chatting to\nit. There's only one actual connection, but the server has to separate things\ninto several scopes for easier writing of the code.\n\nYou can see an example of this being used in `frequensgi \u003Chttps://github.com/andrewgodwin/frequensgi\u003E`_.\n\n\nWSGI-to-ASGI adapter\n--------------------\n\nAllows you to wrap a WSGI application so it appears as a valid ASGI application.\n\nSimply wrap it around your WSGI application like so::\n\n asgi_application = WsgiToAsgi(wsgi_application)\n\nThe WSGI application will be run in a synchronous threadpool, and the wrapped\nASGI application will be one that accepts ``http`` class messages.\n\nPlease note that not all extended features of WSGI may be supported (such as\nfile handles for incoming POST bodies).\n\n\nDependencies\n------------\n\n``asgiref`` requires Python 3.9 or higher.\n\n\nContributing\n------------\n\nPlease refer to the\n`main Channels contributing docs \u003Chttps://github.com/django/channels/blob/master/CONTRIBUTING.rst\u003E`_.\n\n\nTesting\n'''''''\n\nTo run tests, make sure you have installed the ``tests`` extra with the package::\n\n cd asgiref/\n pip install -e .[tests]\n pytest\n\n\nBuilding the documentation\n''''''''''''''''''''''''''\n\nThe documentation uses `Sphinx \u003Chttp://www.sphinx-doc.org\u003E`_::\n\n cd asgiref/docs/\n pip install sphinx\n\nTo build the docs, you can use the default tools::\n\n sphinx-build -b html . _build/html # or `make html`, if you've got make set up\n cd _build/html\n python -m http.server\n\n...or you can use ``sphinx-autobuild`` to run a server and rebuild/reload\nyour documentation changes automatically::\n\n pip install sphinx-autobuild\n sphinx-autobuild . _build/html\n\n\nReleasing\n'''''''''\n\nTo release, first add details to CHANGELOG.txt and update the version number in ``asgiref/__init__.py``.\n\nThen, build and push the packages::\n\n python -m build\n twine upload dist/*\n rm -r asgiref.egg-info dist\n\n\nImplementation Details\n----------------------\n\nSynchronous code & threads\n''''''''''''''''''''''''''\n\nThe ``asgiref.sync`` module provides two wrappers that let you go between\nasynchronous and synchronous code at will, while taking care of the rough edges\nfor you.\n\nUnfortunately, the rough edges are numerous, and the code has to work especially\nhard to keep things in the same thread as much as possible. Notably, the\nrestrictions we are working with are:\n\n* All synchronous code called through ``SyncToAsync`` and marked with\n ``thread_sensitive`` should run in the same thread as each other (and if the\n outer layer of the program is synchronous, the main thread)\n\n* If a thread already has a running async loop, ``AsyncToSync`` can't run things\n on that loop if it's blocked on synchronous code that is above you in the\n call stack.\n\nThe first compromise you get to might be that ``thread_sensitive`` code should\njust run in the same thread and not spawn in a sub-thread, fulfilling the first\nrestriction, but that immediately runs you into the second restriction.\n\nThe only real solution is to essentially have a variant of ThreadPoolExecutor\nthat executes any ``thread_sensitive`` code on the outermost synchronous\nthread - either the main thread, or a single spawned subthread.\n\nThis means you now have two basic states:\n\n* If the outermost layer of your program is synchronous, then all async code\n run through ``AsyncToSync`` will run in a per-call event loop in arbitrary\n sub-threads, while all ``thread_sensitive`` code will run in the main thread.\n\n* If the outermost layer of your program is asynchronous, then all async code\n runs on the main thread's event loop, and all ``thread_sensitive`` synchronous\n code will run in a single shared sub-thread.\n\nCrucially, this means that in both cases there is a thread which is a shared\nresource that all ``thread_sensitive`` code must run on, and there is a chance\nthat this thread is currently blocked on its own ``AsyncToSync`` call. Thus,\n``AsyncToSync`` needs to act as an executor for thread code while it's blocking.\n\nThe ``CurrentThreadExecutor`` class provides this functionality; rather than\nsimply waiting on a Future, you can call its ``run_until_future`` method and\nit will run submitted code until that Future is done. This means that code\ninside the call can then run code on your thread.\n\n\nMaintenance and Security\n------------------------\n\nTo report security issues, please contact [email protected]. For GPG\nsignatures and more security process information, see\nhttps://docs.djangoproject.com/en/dev/internals/security/.\n\nTo report bugs or request new features, please open a new GitHub issue.\n\nThis repository is part of the Channels project. For the shepherd and maintenance team, please see the\n`main Channels readme \u003Chttps://github.com/django/channels/blob/master/README.rst\u003E`_.\n",
"description_content_type": null,
"docs_url": null,
"download_url": null,
"downloads": {
"last_day": -1,
"last_month": -1,
"last_week": -1
},
"dynamic": [
"License-File"
],
"home_page": "https://github.com/django/asgiref/",
"keywords": null,
"license": "BSD-3-Clause",
"license_expression": null,
"license_files": [
"LICENSE"
],
"maintainer": null,
"maintainer_email": null,
"name": "asgiref",
"package_url": "https://pypi.org/project/asgiref/",
"platform": null,
"project_url": "https://pypi.org/project/asgiref/",
"project_urls": {
"Changelog": "https://github.com/django/asgiref/blob/master/CHANGELOG.txt",
"Documentation": "https://asgi.readthedocs.io/",
"Further Documentation": "https://docs.djangoproject.com/en/stable/topics/async/#async-adapter-functions",
"Homepage": "https://github.com/django/asgiref/"
},
"provides_extra": [
"tests"
],
"release_url": "https://pypi.org/project/asgiref/3.11.0/",
"requires_dist": [
"typing_extensions\u003E=4; python_version \u003C \"3.11\"",
"pytest; extra == \"tests\"",
"pytest-asyncio; extra == \"tests\"",
"mypy\u003E=1.14.0; extra == \"tests\""
],
"requires_python": "\u003E=3.9",
"summary": "ASGI specs, helper code, and adapters",
"version": "3.11.0",
"yanked": false,
"yanked_reason": null
},
"last_serial": 32495228,
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "91be317c2c55b8bbec407257d45f5c8d1b6867abc76d12043f2d3d58c538a4ea",
"md5": "659fe6bbd7e43c8bde0b7ca065be0f6c",
"sha256": "1db9021efadb0d9512ce8ffaf72fcef601c7b73a8807a1bb2ef143dc6b14846d"
},
"downloads": -1,
"filename": "asgiref-3.11.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "659fe6bbd7e43c8bde0b7ca065be0f6c",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "\u003E=3.9",
"size": 24096,
"upload_time": "2025-11-19T15:32:19",
"upload_time_iso_8601": "2025-11-19T15:32:19.004742Z",
"url": "https://files.pythonhosted.org/packages/91/be/317c2c55b8bbec407257d45f5c8d1b6867abc76d12043f2d3d58c538a4ea/asgiref-3.11.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "76b94db2509eabd14b4a8c71d1b24c8d5734c52b8560a7b1e1a8b56c8d25568b",
"md5": "554794453502d266a90d2254fcb1f7c3",
"sha256": "13acff32519542a1736223fb79a715acdebe24286d98e8b164a73085f40da2c4"
},
"downloads": -1,
"filename": "asgiref-3.11.0.tar.gz",
"has_sig": false,
"md5_digest": "554794453502d266a90d2254fcb1f7c3",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "\u003E=3.9",
"size": 37969,
"upload_time": "2025-11-19T15:32:20",
"upload_time_iso_8601": "2025-11-19T15:32:20.106038Z",
"url": "https://files.pythonhosted.org/packages/76/b9/4db2509eabd14b4a8c71d1b24c8d5734c52b8560a7b1e1a8b56c8d25568b/asgiref-3.11.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"vulnerabilities": []
}
Loading