@@ -159,10 +159,17 @@ static function ( string $p ): bool {
159159 foreach ( $ items as $ itemName ) {
160160 $ title = \Title::newFromText ( $ itemName );
161161 if ( $ title !== null && $ title ->getNamespace () === NS_CATEGORY ) {
162+ wfDebugLog ( 'PandocUltimateConverter ' ,
163+ 'handleExportRequest: resolving category " ' . $ itemName . '" ' );
162164 $ visited = [];
163165 $ categoryPages = $ this ->getCategoryPages ( $ title ->getText (), $ visited );
166+ wfDebugLog ( 'PandocUltimateConverter ' ,
167+ 'handleExportRequest: category " ' . $ itemName
168+ . '" resolved to ' . count ( $ categoryPages ) . ' pages ' );
164169 $ pages = array_merge ( $ pages , $ categoryPages );
165170 } else {
171+ wfDebugLog ( 'PandocUltimateConverter ' ,
172+ 'handleExportRequest: direct page " ' . $ itemName . '" ' );
166173 $ pages [] = $ itemName ;
167174 }
168175 }
@@ -318,25 +325,41 @@ private function getCategoryPages( string $categoryName, array &$visited ): arra
318325
319326 $ dbKey = $ title ->getDBkey ();
320327 if ( in_array ( $ dbKey , $ visited , true ) ) {
321- // Cycle detected — stop recursion.
328+ wfDebugLog ( 'PandocUltimateConverter ' ,
329+ 'getCategoryPages: cycle detected for " ' . $ categoryName . '" (dbKey= ' . $ dbKey . '), skipping ' );
322330 return [];
323331 }
324332 $ visited [] = $ dbKey ;
325333
326- $ dbr = $ this ->mwServices ->getDBLoadBalancer ()->getConnection ( DB_REPLICA );
334+ wfDebugLog ( 'PandocUltimateConverter ' ,
335+ 'getCategoryPages: querying members of category " ' . $ categoryName . '" (dbKey= ' . $ dbKey . ') ' );
336+
337+ $ dbr = $ this ->mwServices ->getConnectionProvider ()->getReplicaDatabase ();
327338 $ pages = [];
328339
329- // Fetch all members of this category.
330- $ res = $ dbr ->newSelectQueryBuilder ()
340+ // MW 1.44+ removed cl_to from categorylinks; must JOIN category table.
341+ // MW 1.42–1.43 still has cl_to so we can filter directly.
342+ $ mwVersion = defined ( 'MW_VERSION ' ) ? MW_VERSION : '0 ' ;
343+ $ qb = $ dbr ->newSelectQueryBuilder ()
331344 ->select ( [ 'cl_from ' ] )
332- ->from ( 'categorylinks ' )
333- ->where ( [ 'cl_to ' => $ dbKey ] )
334- ->caller ( __METHOD__ )
345+ ->from ( 'categorylinks ' );
346+
347+ if ( version_compare ( $ mwVersion , '1.44 ' , '>= ' ) ) {
348+ $ qb ->join ( 'linktarget ' , null , 'cl_target_id = lt_id ' )
349+ ->where ( [ 'lt_title ' => $ dbKey ] );
350+ } else {
351+ $ qb ->where ( [ 'cl_to ' => $ dbKey ] );
352+ }
353+
354+ $ res = $ qb ->caller ( __METHOD__ )
335355 ->fetchResultSet ();
336356
337357 foreach ( $ res as $ row ) {
338358 $ memberTitle = \Title::newFromID ( (int )$ row ->cl_from );
339359 if ( $ memberTitle === null ) {
360+ wfDebugLog ( 'PandocUltimateConverter ' ,
361+ 'getCategoryPages: cl_from= ' . $ row ->cl_from
362+ . ' resolved to NULL title (deleted page?) in category " ' . $ categoryName . '" ' );
340363 continue ;
341364 }
342365
@@ -455,12 +478,15 @@ private function runExport( array $pages, string $format, string $workDir ): str
455478 // so that {{TemplateName}} and {{#if:…}} are resolved before Pandoc sees them.
456479 $ title = \Title::newFromText ( $ pageName );
457480 $ wikitext = $ parser ->preprocess ( $ wikitext , $ title , $ parserOptions );
458-
481+
459482 $ wikitexts [] = $ wikitext ;
460483 $ this ->gatherImages ( $ wikitext , $ mediaDir );
461484 }
462485
463486 $ combinedWikitext = self ::buildCombinedWikitext ( $ pages , $ wikitexts );
487+ wfDebugLog ( 'PandocUltimateConverter ' ,
488+ 'runExport: combinedWikitext= ' . strlen ( $ combinedWikitext ) . ' bytes for '
489+ . count ( $ pages ) . ' page(s) ' );
464490
465491 // Write wikitext to a temp file for pandoc to read.
466492 $ inputFile = $ workDir . DIRECTORY_SEPARATOR . 'input.mediawiki ' ;
@@ -498,6 +524,8 @@ private function runExport( array $pages, string $format, string $workDir ): str
498524
499525 $ cmd [] = $ inputFile ;
500526
527+ wfDebugLog ( 'PandocUltimateConverter ' ,
528+ 'runExport: pandoc command: ' . implode ( ' ' , $ cmd ) );
501529 PandocWrapper::invokePandoc ( $ cmd );
502530
503531 return $ outputFile ;
@@ -628,13 +656,21 @@ private function exportPdfViaLibreOffice(
628656 private function getPageWikitext ( string $ pageName ): string {
629657 $ title = \Title::newFromText ( $ pageName );
630658 if ( $ title === null || !$ title ->exists () ) {
659+ wfDebugLog ( 'PandocUltimateConverter ' ,
660+ 'getPageWikitext: page NOT FOUND " ' . $ pageName . '" '
661+ . ' (title= ' . ( $ title ? $ title ->getPrefixedText () : 'null ' )
662+ . ', exists= ' . ( $ title && $ title ->exists () ? 'yes ' : 'no ' ) . ') ' );
631663 throw new \RuntimeException ( "Page not found: $ pageName " );
632664 }
633665
634666 $ page = $ this ->mwServices ->getWikiPageFactory ()->newFromTitle ( $ title );
635667 $ content = $ page ->getContent ();
636668
637669 if ( !( $ content instanceof \WikitextContent ) ) {
670+ $ contentModel = $ content ? $ content ->getModel () : 'null ' ;
671+ wfDebugLog ( 'PandocUltimateConverter ' ,
672+ 'getPageWikitext: page " ' . $ pageName . '" has non-wikitext content '
673+ . ' (model= ' . $ contentModel . ') ' );
638674 throw new \RuntimeException ( "Page ' $ pageName' does not contain wikitext. " );
639675 }
640676
@@ -662,10 +698,15 @@ private function gatherImages( string $wikitext, string $mediaDir ): void {
662698 // Use the static helper to extract candidate file-link targets (those with a ":").
663699 // Title::newFromText() then does the authoritative namespace validation.
664700 $ candidates = self ::extractWikilinkTargets ( $ wikitext );
701+ wfDebugLog ( 'PandocUltimateConverter ' ,
702+ 'gatherImages: found ' . count ( $ candidates ) . ' link candidates: '
703+ . json_encode ( array_slice ( $ candidates , 0 , 20 ) ) );
665704
666705 foreach ( $ candidates as $ rawLink ) {
667706 $ title = \Title::newFromText ( $ rawLink );
668707 if ( $ title === null ) {
708+ wfDebugLog ( 'PandocUltimateConverter ' ,
709+ 'gatherImages: invalid title for link " ' . $ rawLink . '" ' );
669710 continue ;
670711 }
671712
0 commit comments