-
-
Notifications
You must be signed in to change notification settings - Fork 21
Expand file tree
/
Copy pathParsoidMediaWikiParser.php
More file actions
107 lines (91 loc) · 3.41 KB
/
ParsoidMediaWikiParser.php
File metadata and controls
107 lines (91 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
<?php
namespace PortableInfobox\Parsoid;
use MediaWiki\Title\Title;
use PortableInfobox\Services\Parser\ExternalParser;
use Wikimedia\Parsoid\DOM\Element;
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
use Wikimedia\Parsoid\Utils\DOMCompat;
class ParsoidMediaWikiParser implements ExternalParser {
public function __construct(
private readonly ParsoidExtensionAPI $api,
) {
}
public function parseRecursive( $wikitext ) {
if ( $wikitext === null ) {
return null;
}
$paramParsed = $this->api->wikitextToDOM( $wikitext, [
// this differs from earlier as we need the frame to be able to grab the
// params the user passed - parsoid handles this internally it appears
'processInNewFrame' => false,
'parseOpts' => [ 'context' => 'inline' ],
], true );
'@phan-var Element $paramParsed';
// we don't want Parsoid to wrap in a span or add a typeof here,
// just interested in the content
return DOMCompat::getOuterHTML( $paramParsed );
}
/** @param $wikitext @phan-unused-param */
public function replaceVariables( $wikitext ) {
// no-op - I think handled by ->wikiTextToDOM?
}
/**
* @param $title @phan-unused-param
* @param array $sizeParams @phan-unused-param
*/
public function addImage( $title, array $sizeParams ): ?string {
// no-op at present. Used to extend the image tag with specific information for the PageImages extension.
// that extension relies on Parser hooks and therefore is not safe to assume that will work indefinitely.
// could potentially just do it for now whilst the hooks still exist, and maybe remove at a later date if
// PageImages is not made Parsoid-compat.
return '';
}
public function getParsoidExtensionApi(): ParsoidExtensionAPI {
return $this->api;
}
/**
* Extract the gallery and return the filename -> captions. PortableInfobox currently does this
* a lot cleaner as it piggybacks on onAfterParserFetchFileAndTitle hook to set the images into the data bag.
* This hook is NOT available on Parsoid, and we have no other way to get the resultant class which PortableInfobox
* currently relies on. So we need to fake it as best we can and hope WMF comes up with something later down
* the line.
* @param string $wikitext
* @return array an array of the images
*/
public function extractGallery( string $wikitext ): array {
// the legacy implementation reuturns an array where each element is an array of the caption
// and the title object for that specific image. We don't have access to this by default,
// since there is no concept of half parsing in Parsoid - we either ask Parsoid for the Parsed wt->html
// or we work with the WT and grab what we need.
$result = [];
// this is quicker than passing the wikitext to Parsoid and extracting
// the images etc from it.
if ( preg_match( '/<gallery[^>]*>(.*?)<\/gallery>/s', $wikitext, $matches ) ) {
$galleryContent = trim( $matches[1] );
if ( !$galleryContent ) {
return [];
}
$lines = explode( "\n", $galleryContent );
foreach ( $lines as $line ) {
$line = trim( $line );
if ( !$line ) {
continue;
}
$parts = explode( '|', $line, 2 );
$filename = trim( $parts[0] );
$caption = trim( $parts[1] ?? '' );
if ( !$filename ) {
continue;
}
$title = Title::newFromText( $filename, NS_FILE );
if ( $title !== null ) {
$result[] = [
'label' => $caption,
'title' => $title,
];
}
}
}
return $result;
}
}