Skip to content

Commit 272564c

Browse files
committed
Merge remote-tracking branch 'origin/main' into 1983-fix-la-blazor
# Conflicts: # CHANGES.md
2 parents 4113c62 + 7681930 commit 272564c

6 files changed

Lines changed: 6066 additions & 13 deletions

File tree

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ Fixes:
2626
opinions, and the legacy http:// host now returns 521. The scraper now reads
2727
the RSS feed for recent Opinions sub-pages and drives the Blazor SignalR
2828
circuit to retrieve the server-rendered news release. #1983
29+
- `ca9` oral argument audio URLs now point to `cdn.ca9.uscourts.gov`; the old
30+
`www` host started returning 404 pages after the site redesign (#1987)
31+
- `uscfc` was crashing ingestion because some opinions (attorney discipline
32+
orders) now have relative download links. Resolve them with `urljoin` #1986
2933

3034
## 3.0.21 - 2026-05-29
3135

juriscraper/opinions/united_states/federal_special/uscfc.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import json
1111
import re
12+
from urllib.parse import urljoin
1213

1314
from juriscraper.lib.string_utils import titlecase
1415
from juriscraper.OpinionSiteLinear import OpinionSiteLinear
@@ -68,7 +69,9 @@ def _process_html(self):
6869
status = "Unknown"
6970

7071
parsed_case = {
71-
"url": opinion["link"],
72+
# Some opinions have relative links,
73+
# e.g. "/cofc/opinions/25-11281-parks-....pdf"
74+
"url": urljoin(self.url, opinion["link"]),
7275
"date": opinion["date"],
7376
"other_date": other_date,
7477
"status": status,

juriscraper/oral_args/united_states/federal_appellate/ca9.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
"""Scraper for Ninth Circuit of Appeals
22
CourtID: ca9
33
Court Short Name: ca9
4+
History:
5+
- 2026-06-04: Audio files moved from www.ca9.uscourts.gov to
6+
cdn.ca9.uscourts.gov after the site redesign; same migration that
7+
moved the opinion feeds (#1987).
48
"""
59

610
import json
@@ -22,9 +26,15 @@ def __init__(self, *args, **kwargs):
2226

2327
self.court_id = self.__module__
2428
self.table = "media"
25-
self.base_url = "https://www.ca9.uscourts.gov/datastore/media/"
29+
self.base_url = "https://cdn.ca9.uscourts.gov/datastore/media/"
30+
# Recent files are served as "application/octet-stream; charset=UTF-8";
31+
# older files (relevant for backscrapes) as "binary/octet-stream",
32+
# "audio/mpeg" or "audio/x-ms-wma" depending on the year
2633
self.expected_content_types = [
27-
"application/octet-stream; charset=UTF-8"
34+
"application/octet-stream; charset=UTF-8",
35+
"binary/octet-stream",
36+
"audio/mpeg",
37+
"audio/x-ms-wma",
2838
]
2939
# AWS Cognito creds step:
3040
self.headers = {

0 commit comments

Comments
 (0)