Skip to content

Commit f9a8b79

Browse files
authored
Merge pull request #40 from eDavidT/FixExportDatabaseQuery
Fix join syntax in SpecialPandocExport.php
2 parents f1221d9 + cb84485 commit f9a8b79

File tree

1 file changed

+49
-8
lines changed

1 file changed

+49
-8
lines changed

includes/SpecialPages/SpecialPandocExport.php

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,17 @@ static function ( string $p ): bool {
159159
foreach ( $items as $itemName ) {
160160
$title = \Title::newFromText( $itemName );
161161
if ( $title !== null && $title->getNamespace() === NS_CATEGORY ) {
162+
wfDebugLog( 'PandocUltimateConverter',
163+
'handleExportRequest: resolving category "' . $itemName . '"' );
162164
$visited = [];
163165
$categoryPages = $this->getCategoryPages( $title->getText(), $visited );
166+
wfDebugLog( 'PandocUltimateConverter',
167+
'handleExportRequest: category "' . $itemName
168+
. '" resolved to ' . count( $categoryPages ) . ' pages');
164169
$pages = array_merge( $pages, $categoryPages );
165170
} else {
171+
wfDebugLog( 'PandocUltimateConverter',
172+
'handleExportRequest: direct page "' . $itemName . '"' );
166173
$pages[] = $itemName;
167174
}
168175
}
@@ -318,25 +325,41 @@ private function getCategoryPages( string $categoryName, array &$visited ): arra
318325

319326
$dbKey = $title->getDBkey();
320327
if ( in_array( $dbKey, $visited, true ) ) {
321-
// Cycle detected — stop recursion.
328+
wfDebugLog( 'PandocUltimateConverter',
329+
'getCategoryPages: cycle detected for "' . $categoryName . '" (dbKey=' . $dbKey . '), skipping' );
322330
return [];
323331
}
324332
$visited[] = $dbKey;
325333

326-
$dbr = $this->mwServices->getDBLoadBalancer()->getConnection( DB_REPLICA );
334+
wfDebugLog( 'PandocUltimateConverter',
335+
'getCategoryPages: querying members of category "' . $categoryName . '" (dbKey=' . $dbKey . ')' );
336+
337+
$dbr = $this->mwServices->getConnectionProvider()->getReplicaDatabase();
327338
$pages = [];
328339

329-
// Fetch all members of this category.
330-
$res = $dbr->newSelectQueryBuilder()
340+
// MW 1.44+ removed cl_to from categorylinks; must JOIN category table.
341+
// MW 1.42–1.43 still has cl_to so we can filter directly.
342+
$mwVersion = defined( 'MW_VERSION' ) ? MW_VERSION : '0';
343+
$qb = $dbr->newSelectQueryBuilder()
331344
->select( [ 'cl_from' ] )
332-
->from( 'categorylinks' )
333-
->where( [ 'cl_to' => $dbKey ] )
334-
->caller( __METHOD__ )
345+
->from( 'categorylinks' );
346+
347+
if ( version_compare( $mwVersion, '1.44', '>=' ) ) {
348+
$qb->join( 'linktarget', null, 'cl_target_id = lt_id' )
349+
->where( [ 'lt_title' => $dbKey ] );
350+
} else {
351+
$qb->where( [ 'cl_to' => $dbKey ] );
352+
}
353+
354+
$res = $qb->caller( __METHOD__ )
335355
->fetchResultSet();
336356

337357
foreach ( $res as $row ) {
338358
$memberTitle = \Title::newFromID( (int)$row->cl_from );
339359
if ( $memberTitle === null ) {
360+
wfDebugLog( 'PandocUltimateConverter',
361+
'getCategoryPages: cl_from=' . $row->cl_from
362+
. ' resolved to NULL title (deleted page?) in category "' . $categoryName . '"' );
340363
continue;
341364
}
342365

@@ -455,12 +478,15 @@ private function runExport( array $pages, string $format, string $workDir ): str
455478
// so that {{TemplateName}} and {{#if:…}} are resolved before Pandoc sees them.
456479
$title = \Title::newFromText( $pageName );
457480
$wikitext = $parser->preprocess( $wikitext, $title, $parserOptions );
458-
481+
459482
$wikitexts[] = $wikitext;
460483
$this->gatherImages( $wikitext, $mediaDir );
461484
}
462485

463486
$combinedWikitext = self::buildCombinedWikitext( $pages, $wikitexts );
487+
wfDebugLog( 'PandocUltimateConverter',
488+
'runExport: combinedWikitext=' . strlen( $combinedWikitext ) . ' bytes for '
489+
. count( $pages ) . ' page(s)' );
464490

465491
// Write wikitext to a temp file for pandoc to read.
466492
$inputFile = $workDir . DIRECTORY_SEPARATOR . 'input.mediawiki';
@@ -498,6 +524,8 @@ private function runExport( array $pages, string $format, string $workDir ): str
498524

499525
$cmd[] = $inputFile;
500526

527+
wfDebugLog( 'PandocUltimateConverter',
528+
'runExport: pandoc command: ' . implode( ' ', $cmd ) );
501529
PandocWrapper::invokePandoc( $cmd );
502530

503531
return $outputFile;
@@ -628,13 +656,21 @@ private function exportPdfViaLibreOffice(
628656
private function getPageWikitext( string $pageName ): string {
629657
$title = \Title::newFromText( $pageName );
630658
if ( $title === null || !$title->exists() ) {
659+
wfDebugLog( 'PandocUltimateConverter',
660+
'getPageWikitext: page NOT FOUND "' . $pageName . '"'
661+
. ' (title=' . ( $title ? $title->getPrefixedText() : 'null' )
662+
. ', exists=' . ( $title && $title->exists() ? 'yes' : 'no' ) . ')' );
631663
throw new \RuntimeException( "Page not found: $pageName" );
632664
}
633665

634666
$page = $this->mwServices->getWikiPageFactory()->newFromTitle( $title );
635667
$content = $page->getContent();
636668

637669
if ( !( $content instanceof \WikitextContent ) ) {
670+
$contentModel = $content ? $content->getModel() : 'null';
671+
wfDebugLog( 'PandocUltimateConverter',
672+
'getPageWikitext: page "' . $pageName . '" has non-wikitext content'
673+
. ' (model=' . $contentModel . ')' );
638674
throw new \RuntimeException( "Page '$pageName' does not contain wikitext." );
639675
}
640676

@@ -662,10 +698,15 @@ private function gatherImages( string $wikitext, string $mediaDir ): void {
662698
// Use the static helper to extract candidate file-link targets (those with a ":").
663699
// Title::newFromText() then does the authoritative namespace validation.
664700
$candidates = self::extractWikilinkTargets( $wikitext );
701+
wfDebugLog( 'PandocUltimateConverter',
702+
'gatherImages: found ' . count( $candidates ) . ' link candidates: '
703+
. json_encode( array_slice( $candidates, 0, 20 ) ) );
665704

666705
foreach ( $candidates as $rawLink ) {
667706
$title = \Title::newFromText( $rawLink );
668707
if ( $title === null ) {
708+
wfDebugLog( 'PandocUltimateConverter',
709+
'gatherImages: invalid title for link "' . $rawLink . '"' );
669710
continue;
670711
}
671712

0 commit comments

Comments
 (0)