-
Notifications
You must be signed in to change notification settings - Fork 0
Enhance downloader functionality and version bump to v0.6.1 #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,20 @@ | |
| /** Valid USC title numbers (1-54) */ | ||
| export const USC_TITLE_NUMBERS = Array.from({ length: 54 }, (_, i) => i + 1); | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Helpers | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| /** | ||
| * Check whether a list of title numbers covers all 54 USC titles. | ||
| * | ||
| * Handles arbitrary ordering and duplicates. | ||
| */ | ||
| export function isAllTitles(titles: number[]): boolean { | ||
| const unique = new Set(titles); | ||
| return unique.size === 54 && USC_TITLE_NUMBERS.every((n) => unique.has(n)); | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Public API | ||
| // --------------------------------------------------------------------------- | ||
|
|
@@ -78,13 +92,27 @@ | |
|
|
||
| /** | ||
| * Download USC title XML files from OLRC. | ||
| * | ||
| * When all 54 titles are requested, uses the bulk `uscAll` zip for a single | ||
| * HTTP round-trip instead of 54 individual requests. Falls back to per-title | ||
| * downloads if the bulk download fails. | ||
| */ | ||
| export async function downloadTitles(options: DownloadOptions): Promise<DownloadResult> { | ||
| const releasePoint = options.releasePoint ?? CURRENT_RELEASE_POINT; | ||
| const titles = options.titles ?? USC_TITLE_NUMBERS; | ||
|
|
||
| await mkdir(options.outputDir, { recursive: true }); | ||
|
|
||
| // Use bulk zip when all 54 titles are requested | ||
| if (options.titles === undefined || isAllTitles(titles)) { | ||
| try { | ||
| const files = await downloadAndExtractAllTitles(releasePoint, options.outputDir); | ||
| return { releasePoint, files, errors: [] }; | ||
| } catch { | ||
| // Fall back to per-title downloads | ||
| } | ||
|
Comment on lines
+106
to
+113
|
||
| } | ||
|
Comment on lines
+106
to
+114
|
||
|
|
||
| const files: DownloadedFile[] = []; | ||
| const errors: DownloadError[] = []; | ||
|
|
||
|
|
@@ -265,3 +293,121 @@ | |
| }); | ||
| }); | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Bulk download (all titles in one zip) | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| /** Regex matching USC XML filenames like usc01.xml, usc54.xml */ | ||
| const USC_XML_RE = /^(?:.*\/)?usc(\d{2})\.xml$/; | ||
|
|
||
| /** | ||
| * Extract all `usc{NN}.xml` files from a bulk zip archive. | ||
| * | ||
| * Returns an array of `{ titleNumber, filePath }` for each extracted file. | ||
| */ | ||
| function extractAllXmlFromZip( | ||
| zipPath: string, | ||
| outputDir: string, | ||
| ): Promise<{ titleNumber: number; filePath: string }[]> { | ||
| return new Promise((resolve, reject) => { | ||
| yauzlOpen(zipPath, { lazyEntries: true }, (err, zipFile) => { | ||
| if (err) { | ||
| reject(new Error(`Failed to open zip: ${err.message}`)); | ||
| return; | ||
| } | ||
| if (!zipFile) { | ||
| reject(new Error("Failed to open zip: no zipFile returned")); | ||
| return; | ||
| } | ||
|
|
||
| const extracted: { titleNumber: number; filePath: string }[] = []; | ||
| let pending = 0; | ||
| let ended = false; | ||
|
|
||
| const maybeResolve = (): void => { | ||
| if (ended && pending === 0) { | ||
| resolve(extracted); | ||
| } | ||
| }; | ||
|
|
||
| zipFile.on("entry", (entry: Entry) => { | ||
| const match = USC_XML_RE.exec(entry.fileName); | ||
| if (match) { | ||
| const titleNum = parseInt(match[1]!, 10); | ||
|
Check failure on line 337 in packages/usc/src/downloader.ts
|
||
| const outPath = join(outputDir, `usc${match[1]!}.xml`); | ||
|
Check failure on line 338 in packages/usc/src/downloader.ts
|
||
| pending++; | ||
|
|
||
| extractEntry(zipFile, entry, outPath) | ||
| .then(() => { | ||
| extracted.push({ titleNumber: titleNum, filePath: outPath }); | ||
| pending--; | ||
| // Continue reading entries after extraction completes | ||
| zipFile.readEntry(); | ||
| maybeResolve(); | ||
| }) | ||
| .catch((extractErr) => { | ||
| zipFile.close(); | ||
| reject(extractErr); | ||
| }); | ||
| } else { | ||
| zipFile.readEntry(); | ||
| } | ||
| }); | ||
|
|
||
| zipFile.on("end", () => { | ||
| ended = true; | ||
| maybeResolve(); | ||
| }); | ||
|
|
||
| zipFile.on("error", (zipErr: Error) => { | ||
| reject(new Error(`Zip error: ${zipErr.message}`)); | ||
| }); | ||
|
Comment on lines
+358
to
+365
|
||
|
|
||
| zipFile.readEntry(); | ||
| }); | ||
| }); | ||
| } | ||
|
|
||
| /** | ||
| * Download the bulk all-titles zip and extract every `usc{NN}.xml` file. | ||
| */ | ||
| async function downloadAndExtractAllTitles( | ||
| releasePoint: string, | ||
| outputDir: string, | ||
| ): Promise<DownloadedFile[]> { | ||
| const url = buildAllTitlesUrl(releasePoint); | ||
| const zipPath = join(outputDir, "uscAll.zip"); | ||
|
|
||
| // Download the zip file | ||
| const response = await fetch(url); | ||
| if (!response.ok) { | ||
| throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`); | ||
| } | ||
|
|
||
| if (!response.body) { | ||
| throw new Error(`No response body for ${url}`); | ||
| } | ||
|
|
||
| // Write zip to disk | ||
| const fileStream = createWriteStream(zipPath); | ||
| await pipeline(Readable.fromWeb(response.body as never), fileStream); | ||
|
|
||
| // Extract all XML files from zip | ||
| const extracted = await extractAllXmlFromZip(zipPath, outputDir); | ||
|
|
||
| // Clean up zip file | ||
| await unlink(zipPath); | ||
|
|
||
| // Stat each extracted file and build results | ||
| const files: DownloadedFile[] = []; | ||
| for (const { titleNumber, filePath } of extracted) { | ||
| const fileStat = await stat(filePath); | ||
|
Comment on lines
+396
to
+405
|
||
| files.push({ titleNumber, filePath, size: fileStat.size }); | ||
| } | ||
|
|
||
| // Sort by title number for consistent ordering | ||
| files.sort((a, b) => a.titleNumber - b.titleNumber); | ||
|
|
||
| return files; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This changelog entry attributes the bulk USC downloader enhancement to
@law2md/core, but the downloader implementation lives in@law2md/usc. To avoid misleading consumers, adjust this entry to reflect actual core changes in 0.6.1 (e.g., dependency bump / no functional changes) and leave the downloader note to theusc/clichangelogs.