@@ -287,6 +287,7 @@ public function init(): bool
287
287
288
288
if (null === $ articleContent ) {
289
289
$ this ->success = false ;
290
+ /** @var JSLikeHTMLElement */
290
291
$ articleContent = $ this ->dom ->createElement ('div ' );
291
292
$ articleContent ->setAttribute ('class ' , 'readability-content ' );
292
293
$ articleContent ->setInnerHtml ('<p>Sorry, Readability was unable to parse this page for content.</p> ' );
@@ -302,7 +303,9 @@ public function init(): bool
302
303
303
304
// without tidy the body can (sometimes) be wiped, so re-create it
304
305
if (false === isset ($ this ->body ->childNodes )) {
305
- $ this ->body = $ this ->dom ->createElement ('body ' );
306
+ /** @var JSLikeHTMLElement */
307
+ $ body = $ this ->dom ->createElement ('body ' );
308
+ $ this ->body = $ body ;
306
309
}
307
310
308
311
// Clear the old HTML, insert the new content.
@@ -335,19 +338,23 @@ public function postProcessContent(\DOMElement $articleContent): void
335
338
*/
336
339
public function addFootnotes (\DOMElement $ articleContent ): void
337
340
{
341
+ /** @var JSLikeHTMLElement */
338
342
$ footnotesWrapper = $ this ->dom ->createElement ('footer ' );
339
343
$ footnotesWrapper ->setAttribute ('class ' , 'readability-footnotes ' );
340
344
$ footnotesWrapper ->setInnerHtml ('<h3>References</h3> ' );
341
345
$ articleFootnotes = $ this ->dom ->createElement ('ol ' );
342
346
$ articleFootnotes ->setAttribute ('class ' , 'readability-footnotes-list ' );
343
347
$ footnotesWrapper ->appendChild ($ articleFootnotes );
348
+ /** @var \DOMNodeList<JSLikeHTMLElement> */
344
349
$ articleLinks = $ articleContent ->getElementsByTagName ('a ' );
345
350
$ linkCount = 0 ;
346
351
347
352
for ($ i = 0 ; $ i < $ articleLinks ->length ; ++$ i ) {
348
353
$ articleLink = $ articleLinks ->item ($ i );
349
354
$ footnoteLink = $ articleLink ->cloneNode (true );
355
+ /** @var JSLikeHTMLElement */
350
356
$ refLink = $ this ->dom ->createElement ('a ' );
357
+ /** @var JSLikeHTMLElement */
351
358
$ footnote = $ this ->dom ->createElement ('li ' );
352
359
$ linkDomain = @parse_url ($ footnoteLink ->getAttribute ('href ' ), \PHP_URL_HOST );
353
360
if (!$ linkDomain && isset ($ this ->url )) {
@@ -609,6 +616,7 @@ public function killBreaks(JSLikeHTMLElement $node): void
609
616
*/
610
617
public function clean (JSLikeHTMLElement $ e , string $ tag ): void
611
618
{
619
+ /** @var \DOMNodeList<JSLikeHTMLElement> */
612
620
$ targetList = $ e ->getElementsByTagName ($ tag );
613
621
$ isEmbed = ('audio ' === $ tag || 'video ' === $ tag || 'iframe ' === $ tag || 'object ' === $ tag || 'embed ' === $ tag );
614
622
@@ -645,6 +653,7 @@ public function cleanConditionally(JSLikeHTMLElement $e, string $tag): void
645
653
return ;
646
654
}
647
655
656
+ /** @var \DOMNodeList<JSLikeHTMLElement> */
648
657
$ tagsList = $ e ->getElementsByTagName ($ tag );
649
658
$ curTagsLength = $ tagsList ->length ;
650
659
@@ -755,6 +764,7 @@ public function cleanConditionally(JSLikeHTMLElement $e, string $tag): void
755
764
public function cleanHeaders (JSLikeHTMLElement $ e ): void
756
765
{
757
766
for ($ headerIndex = 1 ; $ headerIndex < 3 ; ++$ headerIndex ) {
767
+ /** @var \DOMNodeList<JSLikeHTMLElement> */
758
768
$ headers = $ e ->getElementsByTagName ('h ' . $ headerIndex );
759
769
760
770
for ($ i = $ headers ->length - 1 ; $ i >= 0 ; --$ i ) {
@@ -823,6 +833,7 @@ protected function getArticleTitle(): JSLikeHTMLElement
823
833
$ curTitle = $ origTitle ;
824
834
}
825
835
836
+ /** @var JSLikeHTMLElement */
826
837
$ articleTitle = $ this ->dom ->createElement ('h1 ' );
827
838
$ articleTitle ->setInnerHtml ($ curTitle );
828
839
@@ -840,7 +851,9 @@ protected function prepDocument(): void
840
851
* so we create a new body node and append it to the document.
841
852
*/
842
853
if (null === $ this ->body ) {
843
- $ this ->body = $ this ->dom ->createElement ('body ' );
854
+ /** @var JSLikeHTMLElement */
855
+ $ body = $ this ->dom ->createElement ('body ' );
856
+ $ this ->body = $ body ;
844
857
$ this ->dom ->documentElement ->appendChild ($ this ->body );
845
858
}
846
859
@@ -944,6 +957,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
944
957
$ xpath = new \DOMXPath ($ page );
945
958
}
946
959
960
+ /** @var \DOMNodeList<JSLikeHTMLElement> */
947
961
$ allElements = $ page ->getElementsByTagName ('* ' );
948
962
949
963
for ($ nodeIndex = 0 ; $ allElements ->item ($ nodeIndex ); ++$ nodeIndex ) {
@@ -986,6 +1000,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
986
1000
// (as in, where they contain no other block level elements).
987
1001
if ('div ' === $ tagName ) {
988
1002
if (!preg_match ($ this ->regexps ['divToPElements ' ], $ nodeContent )) {
1003
+ /** @var JSLikeHTMLElement */
989
1004
$ newNode = $ this ->dom ->createElement ('p ' );
990
1005
991
1006
try {
@@ -1156,7 +1171,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
1156
1171
}
1157
1172
}
1158
1173
1159
- /** @var \DOMNodeList <JSLikeHTMLElement> */
1174
+ /** @var non-empty-array <JSLikeHTMLElement|null > */
1160
1175
$ topCandidates = array_filter (
1161
1176
$ topCandidates ,
1162
1177
fn ($ v , $ idx ) => 0 === $ idx || null !== $ v ,
@@ -1169,18 +1184,21 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
1169
1184
* We also have to copy the body node so it is something we can modify.
1170
1185
*/
1171
1186
if (null === $ topCandidate || 0 === strcasecmp ($ topCandidate ->tagName , 'body ' )) {
1187
+ /** @var JSLikeHTMLElement */
1172
1188
$ topCandidate = $ this ->dom ->createElement ('div ' );
1173
1189
1174
1190
if ($ page instanceof \DOMDocument) {
1175
- if (!isset ($ page ->documentElement )) {
1191
+ /** @var ?JSLikeHTMLElement */
1192
+ $ documentElement = $ page ->documentElement ;
1193
+ if (null === $ documentElement ) {
1176
1194
// we don't have a body either? what a mess! :)
1177
1195
$ this ->logger ->debug ('The page has no body! ' );
1178
1196
} else {
1179
1197
$ this ->logger ->debug ('Setting body to a raw HTML of original page! ' );
1180
- $ topCandidate ->setInnerHtml ($ page -> documentElement ->getInnerHTML ());
1181
- $ page -> documentElement ->setInnerHtml ('' );
1198
+ $ topCandidate ->setInnerHtml ($ documentElement ->getInnerHTML ());
1199
+ $ documentElement ->setInnerHtml ('' );
1182
1200
$ this ->reinitBody ();
1183
- $ page -> documentElement ->appendChild ($ topCandidate );
1201
+ $ documentElement ->appendChild ($ topCandidate );
1184
1202
}
1185
1203
} else {
1186
1204
$ topCandidate ->setInnerHtml ($ page ->getInnerHTML ());
@@ -1189,7 +1207,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
1189
1207
}
1190
1208
1191
1209
$ this ->initializeNode ($ topCandidate );
1192
- } elseif ($ topCandidate ) {
1210
+ } elseif (null !== $ topCandidate ) {
1193
1211
$ alternativeCandidateAncestors = [];
1194
1212
foreach ($ topCandidates as $ candidate ) {
1195
1213
if ((int ) $ candidate ->getAttribute ('readability ' ) / (int ) $ topCandidate ->getAttribute ('readability ' ) >= 0.75 ) {
@@ -1200,7 +1218,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
1200
1218
}
1201
1219
if (\count ($ alternativeCandidateAncestors ) >= 3 ) {
1202
1220
$ parentOfTopCandidate = $ topCandidate ->parentNode ;
1203
- while ('body ' !== $ parentOfTopCandidate ->nodeName ) {
1221
+ while ('body ' !== $ parentOfTopCandidate ->nodeName && $ parentOfTopCandidate instanceof JSLikeHTMLElement ) {
1204
1222
$ listsContainingThisAncestor = 0 ;
1205
1223
for ($ ancestorIndex = 0 ; $ ancestorIndex < \count ($ alternativeCandidateAncestors ) && $ listsContainingThisAncestor < 3 ; ++$ ancestorIndex ) {
1206
1224
$ listsContainingThisAncestor += (int ) \in_array ($ parentOfTopCandidate , $ alternativeCandidateAncestors [$ ancestorIndex ], true );
@@ -1264,6 +1282,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
1264
1282
* Now that we have the top candidate, look through its siblings for content that might also be related.
1265
1283
* Things like preambles, content split by ads that we removed, etc.
1266
1284
*/
1285
+ /** @var JSLikeHTMLElement */
1267
1286
$ articleContent = $ this ->dom ->createElement ('div ' );
1268
1287
$ articleContent ->setAttribute ('class ' , 'readability-content ' );
1269
1288
$ siblingScoreThreshold = max (10 , ((int ) $ topCandidate ->getAttribute ('readability ' )) * 0.2 );
@@ -1311,6 +1330,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
1311
1330
if (0 !== strcasecmp ($ siblingNodeName , 'div ' ) && 0 !== strcasecmp ($ siblingNodeName , 'p ' )) {
1312
1331
// We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident.
1313
1332
$ this ->logger ->debug ('Altering siblingNode " ' . $ siblingNodeName . '" to "div". ' );
1333
+ /** @var JSLikeHTMLElement */
1314
1334
$ nodeToAppend = $ this ->dom ->createElement ('div ' );
1315
1335
1316
1336
try {
@@ -1412,7 +1432,9 @@ protected function weightAttribute(JSLikeHTMLElement $element, string $attribute
1412
1432
protected function reinitBody (): void
1413
1433
{
1414
1434
if (!isset ($ this ->body ->childNodes )) {
1415
- $ this ->body = $ this ->dom ->createElement ('body ' );
1435
+ /** @var JSLikeHTMLElement */
1436
+ $ body = $ this ->dom ->createElement ('body ' );
1437
+ $ this ->body = $ body ;
1416
1438
$ this ->body ->setInnerHtml ($ this ->bodyCache );
1417
1439
}
1418
1440
}
@@ -1544,7 +1566,7 @@ private function isPhrasingContent($node): bool
1544
1566
private function getSingleTagInsideElement (JSLikeHTMLElement $ node , string $ tag ): ?JSLikeHTMLElement
1545
1567
{
1546
1568
$ childNodes = iterator_to_array ($ node ->childNodes );
1547
- $ children = array_filter ($ childNodes , fn ($ childNode ) => $ childNode instanceof \DOMElement );
1569
+ $ children = array_filter ($ childNodes , fn ($ childNode ) => $ childNode instanceof JSLikeHTMLElement );
1548
1570
1549
1571
// There should be exactly 1 element child with given tag
1550
1572
if (1 !== \count ($ children ) || $ children [0 ]->nodeName !== $ tag ) {
0 commit comments