Skip to content

Commit 2caa126

Browse files
committed
Rewrote Windows Terminal file download endpoints to parse GitHub repo directly
1 parent 1152ab2 commit 2caa126

File tree

1 file changed

+92
-184
lines changed

1 file changed

+92
-184
lines changed

src/murfey/server/api/bootstrap.py

Lines changed: 92 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from __future__ import annotations
1616

1717
import functools
18-
import html
1918
import logging
2019
import random
2120
import re
@@ -572,105 +571,74 @@ def get_msys2_package_file(
572571
=======================================================================================
573572
"""
574573

575-
windows_terminal_url = "https://api.github.com/repos/microsoft/terminal/releases"
574+
windows_terminal_url = "https://github.com/microsoft/terminal/releases"
576575

577576

578-
def get_number_of_pages(url: str) -> int:
577+
def get_number_of_github_pages(url) -> int:
579578
"""
580-
Calculates the number of pages present in a GitHub release repo by parsing the
581-
header of the HTTP response.
579+
Parses the main GitHub releases page to find the number of pages present in the
580+
repository.
582581
"""
583582

584-
# Get first page for the repo
585-
response = requests.get(
586-
url,
587-
)
588-
589-
# Extract number of pages in repo from the header
583+
response = requests.get(url)
590584
headers = response.headers
591-
if headers.get("link") is None:
592-
logger.debug(f"{url} only has 1 page of releases")
593-
num_pages: int = 1
594-
else:
595-
links = headers["link"]
596-
# Use re.search() to find the url for the last page in that list
597-
pattern = r'<[\w\?\=\.\/\:]+\?page=([0-9]+)>; rel="last"'
598-
match = re.search(pattern, links)
599-
if match is None:
600-
logger.warning("Unable to parse header for links")
601-
num_pages = 1
602-
else:
603-
# Get the number of pages
604-
num_pages = int(match.group(1))
605-
606-
return num_pages
607-
608-
609-
def get_github_release_versions(url: str) -> dict[str, str]:
610-
"""
611-
Searches the GitHub API for non-draft/prerelease versions of the repository,
612-
returning a dictionary of tag names (i.e. version) and corresponding URL to
613-
their assets.
614-
"""
615-
616-
versions: dict[str, str] = {}
617-
num_pages = get_number_of_pages(url)
618-
619-
# Parse through version releases
620-
for p in range(num_pages):
621-
# Get release information
622-
response = requests.get(
623-
url + f"?page={p+1}", # Pagination starts from 1
624-
)
585+
if not headers["content-type"].startswith("text/html"):
586+
raise HTTPException("Unable to parse non-HTML page for page numbers")
625587

626-
# Validate the response via its header
627-
headers = response.headers
628-
# Is the response a JSON blob?
629-
if p == 0 and not headers["content-type"].startswith("application/json"):
630-
raise HTTPException(
631-
status_code=500, detail="The request returned a non-JSON object"
632-
)
633-
# Has the rate limit been exceeded?
634-
if headers["X-RateLimit-Remaining"] == 0:
635-
raise HTTPException(
636-
status_code=429, detail="Rate limit for accessing GitHub exceeded"
637-
)
638-
# Is access denied?
639-
if "Bad credentials" in response.text:
640-
raise HTTPException(
641-
status_code=401, detail="Invalid credentials to access GitHub resource"
642-
)
643-
644-
# Iterate through each release
645-
release_list: list[dict] = response.json()
646-
for r in range(len(release_list)):
647-
release = release_list[r]
648-
649-
# Skip pre-releases and drafts
650-
draft: bool = release["draft"]
651-
prerelease: bool = release["prerelease"]
652-
if draft is True or prerelease is True:
653-
continue
654-
655-
# Add tag name (i.e, version) and assets URL (file download links) to dict
656-
version: str = release["tag_name"]
657-
assets_url: str = release["assets_url"]
658-
versions[version] = assets_url
659-
660-
return versions
588+
# Find the number of pages present in this release
589+
text = response.text
590+
pattern = r'aria-label="Page ([0-9]+)"'
591+
matches = re.findall(pattern, text)
592+
if len(matches) == 0:
593+
raise HTTPException("No page numbers found")
594+
pages = [int(item) for item in matches]
595+
pages.sort(reverse=True)
596+
return pages[0]
661597

662598

663599
@windows_terminal.get("/releases", response_class=Response)
664600
def get_windows_terminal_releases(request: Request):
665601
"""
666-
Display a list of Windows Terminal versions excluding pre-releases and drafts
602+
Returns a list of stable Windows Terminal releases from the GitHub repository.
667603
"""
668604

669-
# Get tag names/versions from dictionary of releases
670-
releases = get_github_release_versions(windows_terminal_url)
671-
version_list = list(releases.keys())
605+
num_pages = get_number_of_github_pages(windows_terminal_url)
606+
607+
# Get list of release versions
608+
versions: list[str] = []
672609

673-
# Construct the HTML document
610+
# RegEx patterns to parse HTML file with
611+
# https://github.com/{owner}/{repo}/releases/expanded_assets/{version} leads to a
612+
# HTML page with the assets for that particular version
613+
release_pattern = (
614+
r'src="' + f"{windows_terminal_url}" + r'/expanded_assets/([v0-9\.]+)"'
615+
)
616+
# Pre-release label follows after link to version tag
617+
prerelease_pattern = (
618+
r'[\s]*<span data-view-component="true" class="f1 text-bold d-inline mr-3"><a href="/microsoft/terminal/releases/tag/([\w\.]+)" data-view-component="true" class="Link--primary Link">[\w\s\.\-]+</a></span>'
619+
r"[\s]*<span>"
620+
r'[\s]*<span data-view-component="true" class="Label Label--warning Label--large v-align-text-bottom d-none d-md-inline-block">Pre-release</span>'
621+
)
622+
# Older packages in the repo are named "Color Tools"; omit them
623+
colortool_pattern = r'<span data-view-component="true" class="f1 text-bold d-inline mr-3"><a href="/microsoft/terminal/releases/tag/([\w\.]+)" data-view-component="true" class="Link--primary Link">Color Tool[\w\s]+</a></span>'
624+
625+
# Iterate through repository pages
626+
for p in range(num_pages):
627+
url = windows_terminal_url + f"?page={p + 1}"
628+
response = requests.get(url)
629+
headers = response.headers
630+
if not headers["content-type"].startswith("text/html"):
631+
raise HTTPException("Unable to parse non-HTML page for package versions")
632+
text = response.text
633+
634+
# Collect only stable releases
635+
releases = re.findall(release_pattern, text)
636+
prereleases = re.findall(prerelease_pattern, text)
637+
colortool = re.findall(colortool_pattern, text)
638+
stable = set(releases) - (set(prereleases) | set(colortool))
639+
versions.extend(stable)
640+
641+
# Construct HTML document for available versions
674642
html_head = "\n".join(
675643
(
676644
"<!DOCTYPE html>",
@@ -682,13 +650,13 @@ def get_windows_terminal_releases(request: Request):
682650
" <h1>Links for Windows Terminal</h1>",
683651
)
684652
)
685-
653+
# Construct hyperlinks
686654
link_list = []
687-
base_url = str(request.base_url).strip("/")
655+
base_url = str(request.base_url).strip("/") # Remove trailing '/'
688656
path = request.url.path.strip("/") # Remove leading '/'
689657

690-
for v in range(len(version_list)):
691-
version = version_list[v]
658+
for v in range(len(versions)):
659+
version = versions[v]
692660
hyperlink = f'<a href="{base_url}/{path}/{version}">{version}</a><br />'
693661
link_list.append(hyperlink)
694662
hyperlinks = "\n".join(link_list)
@@ -699,109 +667,60 @@ def get_windows_terminal_releases(request: Request):
699667
"</html>",
700668
)
701669
)
702-
# print(html_tail)
703670

704-
# Combine to form HTML document
671+
# Combine
705672
content = "\n".join((html_head, hyperlinks, html_tail))
706673

707674
# Return FastAPI response
708675
return Response(
709676
content=content.encode("utf-8"),
710-
status_code=200,
677+
status_code=response.status_code,
711678
media_type="text/html",
712679
)
713680

714681

715-
def get_github_version_assets(url: str) -> dict[str, str]:
682+
@windows_terminal.get("/releases/{version}", response_class=Response)
683+
def get_windows_terminal_version_assets(
684+
version: str,
685+
request: Request,
686+
):
716687
"""
717-
Returns key-value pairs of assets for a particular version release and their
718-
corresponding file download links.
688+
Returns a list of packages for the selected version of Windows Terminal.
719689
"""
720690

721-
response = requests.get(
722-
url,
723-
)
724-
headers = response.headers
725-
# Check that it's a JSON blob
726-
if not headers["content-type"].startswith("application/json"):
727-
raise HTTPException(
728-
status_code=500,
729-
detail="The request returned a non-JSON object",
730-
)
731-
# Has the rate limit been exceeded?
732-
if headers["X-RateLimit-Remaining"] == 0:
733-
raise HTTPException(
734-
status_code=429, detail="Rate limit for accessing GitHub exceeded"
735-
)
736-
# Is access denied?
737-
if "Bad credentials" in response.text:
738-
raise HTTPException(
739-
status_code=401, detail="Invalid credentials to access GitHub resource"
740-
)
741-
742-
assets = {}
743-
assets_list: list[dict] = response.json()
744-
for a in range(len(assets_list)):
745-
asset = assets_list[a]
746-
747-
# TODO: Keep only "arm64", "x86_64", and "x86" architectures
748-
749-
# Add filename and download link to dict
750-
file_name: str = asset["name"]
751-
download_url: str = asset["browser_download_url"]
752-
assets[file_name] = download_url
753-
754-
return assets
755-
691+
# https://github.com/{owner}/{repo}/releases/expanded_assets/{version}
692+
url = windows_terminal_url + f"/expanded_assets/{version}"
756693

757-
@windows_terminal.get("/releases/{version}", response_class=Response)
758-
def get_windows_terminal_version_packages(version: str, request: Request):
759-
760-
# Load the dictionary of versions and get asset URL of requested version
761-
releases = get_github_release_versions(windows_terminal_url)
762-
asset_url = releases.get(version)
763-
if asset_url is None:
764-
raise HTTPException(
765-
status_code=500,
766-
detail=f"Unable to load assets for {version}",
767-
)
694+
response = requests.get(url)
695+
headers = response.headers
696+
if not headers["content-type"].startswith("text/html"):
697+
raise HTTPException("Unable to parse non-HTML page for page numbers")
698+
text = response.text
768699

769-
# Load list of assets associated with this version
770-
assets = get_github_version_assets(asset_url)
771-
file_list = list(assets.keys())
700+
# Find hyperlinks
701+
pattern = r'href="[/\w\.]+/releases/download/' + f"{version}" + r'/([\w\.\-]+)"'
702+
assets = re.findall(pattern, text)
772703

773-
# Construct the HTML document
704+
# Construct HTML document for available assets
774705
html_head = "\n".join(
775706
(
776707
"<!DOCTYPE html>",
777708
"<html>",
778709
"<head>",
779-
f" <title>Links for Windows Terminal {html.escape(version)}</title>",
710+
" <title>Links for Windows Terminal</title>",
780711
"</head>",
781712
"<body>",
782-
f" <h1>Links for Windows Terminal {html.escape(version)}</h1>",
713+
" <h1>Links for Windows Terminal</h1>",
783714
)
784715
)
785-
786-
# Construct links
716+
# Construct hyperlinks
787717
link_list = []
788-
base_url = str(request.base_url).strip("/")
718+
base_url = str(request.base_url).strip("/") # Remove trailing '/'
789719
path = request.url.path.strip("/") # Remove leading '/'
790720

791-
# Components of a response
792-
print(
793-
# f"Base URL: {request.base_url} \n"
794-
# f"Components: {request.url.components} \n"
795-
# f"Fragment: {request.url.fragment} \n"
796-
# f"Hostname: {request.url.hostname} \n"
797-
# f"Netloc: {request.url.netloc} \n"
798-
# f"Path: {request.url.path} \n"
799-
# f"Scheme: {request.url.scheme} \n"
800-
)
801-
802-
for f in range(len(file_list)):
803-
file_name = file_list[f]
804-
hyperlink = f'<a href="{base_url}/{path}/{file_name}">{file_name}</a><br />'
721+
for a in range(len(assets)):
722+
asset = assets[a]
723+
hyperlink = f'<a href="{base_url}/{path}/{asset}">{asset}</a><br />'
805724
link_list.append(hyperlink)
806725
hyperlinks = "\n".join(link_list)
807726

@@ -811,12 +730,14 @@ def get_windows_terminal_version_packages(version: str, request: Request):
811730
"</html>",
812731
)
813732
)
733+
734+
# Combine
814735
content = "\n".join((html_head, hyperlinks, html_tail))
815736

816737
# Return FastAPI response
817738
return Response(
818739
content=content.encode("utf-8"),
819-
status_code=200,
740+
status_code=response.status_code,
820741
media_type="text/html",
821742
)
822743

@@ -826,30 +747,17 @@ def get_windows_terminal_package_file(
826747
version: str,
827748
file_name: str,
828749
):
829-
# Search for the package
830-
versions = get_github_release_versions(windows_terminal_url)
831-
asset_url = versions.get(version)
832-
if asset_url is None:
833-
raise HTTPException(
834-
status_code=500, detail=f"Unable to load assets for {version}"
835-
)
836-
assets = get_github_version_assets(asset_url)
837-
file_url = assets.get(file_name)
838-
if file_url is None:
839-
raise HTTPException(
840-
status_code=500, detail=f"No download link associated with {file_name}"
841-
)
842-
843-
# Get HTTP response
844-
response = requests.get(
845-
file_url,
846-
)
750+
"""
751+
Returns a package from the GitHub repository.
752+
"""
847753

754+
url = windows_terminal_url + f"/download/{version}/{file_name}"
755+
response = requests.get(url)
848756
if response.status_code == 200:
849757
return Response(
850758
content=response.content,
851-
media_type=response.headers.get("content-type"),
852759
status_code=response.status_code,
760+
headers=response.headers,
853761
)
854762
else:
855763
raise HTTPException(status_code=response.status_code)

0 commit comments

Comments
 (0)