@@ -69,15 +69,24 @@ export interface BaseEcfrConvertOptions {
6969
7070/** Single-granularity mode: one output directory, one granularity. */
7171export interface SingleEcfrConvertOptions extends BaseEcfrConvertOptions {
72- /** Output root directory */
72+ /** Output root directory. Required in single-granularity mode. */
7373 output : string ;
74- /** Output granularity. Defaults to "section" when omitted. */
74+ /** Output granularity. Defaults to ` "section"` when omitted. */
7575 granularity ?: EcfrGranularity | undefined ;
7676 /** @internal — must not be set in single-granularity mode */
7777 granularities ?: undefined ;
7878}
7979
80- /** Multi-granularity mode: a set of `{granularity, output}` pairs emitted from one parse. */
80+ /**
81+ * Multi-granularity mode: a set of `{granularity, output}` pairs emitted from
82+ * one parse.
83+ *
84+ * The builder emits at the set of unique `LevelType`s needed to satisfy the
85+ * requested granularities. `section` and `chapter` both emit at the section
86+ * level — chapter output is synthesized from the section bucket at write
87+ * time (by grouping sections under their chapter ancestor). `part` and
88+ * `title` each emit at their own level.
89+ */
8190export interface MultiEcfrConvertOptions extends BaseEcfrConvertOptions {
8291 /** Multiple `{granularity, output}` pairs to produce in a single parse. */
8392 granularities : readonly EcfrGranularityOutput [ ] ;
@@ -259,14 +268,18 @@ export async function convertEcfrTitle(
259268 return first ;
260269}
261270
262- /** Extract title number and name from the first available collected node. */
271+ /**
272+ * Extract title number and name from the first available collected node.
273+ *
274+ * Falls back to `{"0", ""}` when no emitted node has a title ancestor and no
275+ * title-level node was emitted. That path produces `/us/cfr/t0/...` canonical
276+ * identifiers, which is almost always a sign of malformed source XML — we
277+ * warn rather than silently corrupt downstream data.
278+ */
263279function extractTitleInfo ( collectedByLevel : Map < LevelType , CollectedSection [ ] > ) : {
264280 titleNumber : string ;
265281 titleName : string ;
266282} {
267- let titleNumber = "0" ;
268- let titleName = "" ;
269-
270283 // Prefer section emissions (richest ancestor chain), fall back to others.
271284 const probeOrder : LevelType [ ] = [ "section" , "part" , "chapter" , "title" ] ;
272285 for ( const lt of probeOrder ) {
@@ -275,18 +288,24 @@ function extractTitleInfo(collectedByLevel: Map<LevelType, CollectedSection[]>):
275288 if ( ! first ) continue ;
276289 const titleAncestor = first . context . ancestors . find ( ( a ) => a . levelType === "title" ) ;
277290 if ( titleAncestor ) {
278- titleNumber = titleAncestor . numValue ?? "0" ;
279- titleName = titleAncestor . heading ?? first . context . documentMeta . dcTitle ?? "" ;
280- return { titleNumber, titleName } ;
291+ return {
292+ titleNumber : titleAncestor . numValue ?? "0" ,
293+ titleName : titleAncestor . heading ?? first . context . documentMeta . dcTitle ?? "" ,
294+ } ;
281295 }
282296 if ( first . node . levelType === "title" ) {
283- titleNumber = first . node . numValue ?? "0" ;
284- titleName = first . node . heading ?? first . context . documentMeta . dcTitle ?? "" ;
285- return { titleNumber, titleName } ;
297+ return {
298+ titleNumber : first . node . numValue ?? "0" ,
299+ titleName : first . node . heading ?? first . context . documentMeta . dcTitle ?? "" ,
300+ } ;
286301 }
287302 }
288303
289- return { titleNumber, titleName } ;
304+ console . warn (
305+ "[@lexbuild/ecfr] convertEcfrTitle: could not resolve title number from emitted nodes; " +
306+ "output will use `/us/cfr/t0/...` identifiers. Source XML likely missing a DIV1 TYPE=\"TITLE\"." ,
307+ ) ;
308+ return { titleNumber : "0" , titleName : "" } ;
290309}
291310
292311interface WriteGranularityArgs {
@@ -339,20 +358,32 @@ async function writeGranularity(args: WriteGranularityArgs): Promise<EcfrConvert
339358 { sections : CollectedSection [ ] ; chapterAncestor : AncestorInfo ; firstContext : EmitContext }
340359 > ( ) ;
341360
361+ let skippedRootless = 0 ;
342362 for ( const item of collected ) {
343363 const chapterAnc = item . context . ancestors . find ( ( a ) => a . levelType === "chapter" ) ;
344- const chapterKey = chapterAnc ?. numValue ?? "__root__" ;
345- const existing = chapterMap . get ( chapterKey ) ;
364+ if ( ! chapterAnc ?. numValue ) {
365+ // Section without a chapter ancestor cannot be placed in a chapter
366+ // file. Rare in eCFR (e.g. parts directly under subtitle with no
367+ // surrounding chapter). Drop rather than synthesize a junk filename.
368+ skippedRootless ++ ;
369+ continue ;
370+ }
371+ const existing = chapterMap . get ( chapterAnc . numValue ) ;
346372 if ( existing ) {
347373 existing . sections . push ( item ) ;
348374 } else {
349- chapterMap . set ( chapterKey , {
375+ chapterMap . set ( chapterAnc . numValue , {
350376 sections : [ item ] ,
351- chapterAncestor : chapterAnc ?? { levelType : "chapter" , numValue : chapterKey } ,
377+ chapterAncestor : chapterAnc ,
352378 firstContext : item . context ,
353379 } ) ;
354380 }
355381 }
382+ if ( skippedRootless > 0 ) {
383+ console . warn (
384+ `[@lexbuild/ecfr] convertEcfrTitle: chapter granularity skipped ${ skippedRootless } section(s) with no chapter ancestor` ,
385+ ) ;
386+ }
356387
357388 for ( const [ _chapterKey , { sections, chapterAncestor, firstContext } ] of chapterMap ) {
358389 const chapterNode : LevelNode = {
@@ -555,11 +586,13 @@ function buildDryRunResult(
555586 let count : number ;
556587
557588 if ( granularity === "chapter" ) {
589+ // Mirror the write-phase filter: sections with no chapter ancestor
590+ // would be dropped rather than grouped under a synthetic key.
558591 const chapterKeys = new Set < string > ( ) ;
559592 for ( const { node, context } of collected ) {
560593 const chapterAnc = context . ancestors . find ( ( a ) => a . levelType === "chapter" ) ;
561- const key = chapterAnc ?. numValue ?? "__root__" ;
562- chapterKeys . add ( key ) ;
594+ if ( ! chapterAnc ?. numValue ) continue ;
595+ chapterKeys . add ( chapterAnc . numValue ) ;
563596 totalEstimate += estimateTokens ( node ) ;
564597 }
565598 count = chapterKeys . size ;
0 commit comments