@@ -474,6 +474,7 @@ public function prepArticle(\DOMNode $articleContent): void
474
474
}
475
475
476
476
// Remove service data-candidate attribute.
477
+ /** @var \DOMNodeList<\DOMElement> */
477
478
$ elems = $ xpath ->query ('.//*[@data-candidate] ' , $ articleContent );
478
479
foreach ($ elems as $ elem ) {
479
480
$ elem ->removeAttribute ('data-candidate ' );
@@ -1159,12 +1160,13 @@ protected function grabArticle(?\DOMElement $page = null)
1159
1160
* This is faster to do before scoring but safer after.
1160
1161
*/
1161
1162
if ($ this ->flagIsActive (self ::FLAG_STRIP_UNLIKELYS ) && $ xpath ) {
1163
+ /** @var \DOMNodeList<\DOMElement> */
1162
1164
$ candidates = $ xpath ->query ('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)] ' , $ page ->documentElement );
1163
1165
1164
1166
for ($ c = $ candidates ->length - 1 ; $ c >= 0 ; --$ c ) {
1165
1167
$ node = $ candidates ->item ($ c );
1166
1168
// node should be readable but not inside of an article otherwise it's probably non-readable block
1167
- if ($ node ->hasAttribute ('readability ' ) && (int ) $ node ->getAttributeNode ('readability ' )->value < 40 && ($ node ->parentNode ? 0 !== strcasecmp ($ node ->parentNode ->tagName , 'article ' ) : true )) {
1169
+ if ($ node ->hasAttribute ('readability ' ) && (int ) $ node ->getAttributeNode ('readability ' )->value < 40 && ($ node ->parentNode instanceof \DOMElement ? 0 !== strcasecmp ($ node ->parentNode ->tagName , 'article ' ) : true )) {
1168
1170
$ this ->logger ->debug ('Removing unlikely candidate (using note) ' . $ node ->getNodePath () . ' by " ' . $ node ->tagName . '" with readability ' . self ::getContentScore ($ node ));
1169
1171
$ node ->parentNode ->removeChild ($ node );
1170
1172
}
@@ -1180,6 +1182,7 @@ protected function grabArticle(?\DOMElement $page = null)
1180
1182
$ topCandidates = array_fill (0 , 5 , null );
1181
1183
if ($ xpath ) {
1182
1184
// Using array of DOMElements after deletion is a path to DOOMElement.
1185
+ /** @var \DOMNodeList<\DOMElement> */
1183
1186
$ candidates = $ xpath ->query ('.//*[@data-candidate] ' , $ page ->documentElement );
1184
1187
$ this ->logger ->debug ('Candidates: ' . $ candidates ->length );
1185
1188
@@ -1206,6 +1209,7 @@ protected function grabArticle(?\DOMElement $page = null)
1206
1209
}
1207
1210
}
1208
1211
1212
+ /** @var \DOMNodeList<\DOMElement> */
1209
1213
$ topCandidates = array_filter (
1210
1214
$ topCandidates ,
1211
1215
fn ($ v , $ idx ) => 0 === $ idx || null !== $ v ,
@@ -1323,19 +1327,19 @@ protected function grabArticle(?\DOMElement $page = null)
1323
1327
$ siblingNode = $ siblingNodes ->item ($ s );
1324
1328
$ siblingNodeName = $ siblingNode ->nodeName ;
1325
1329
$ append = false ;
1326
- $ this ->logger ->debug ('Looking at sibling node: ' . $ siblingNode ->getNodePath () . ((\ XML_ELEMENT_NODE === $ siblingNode-> nodeType && $ siblingNode ->hasAttribute ('readability ' )) ? (' with score ' . $ siblingNode ->getAttribute ('readability ' )) : '' ));
1330
+ $ this ->logger ->debug ('Looking at sibling node: ' . $ siblingNode ->getNodePath () . (($ siblingNode instanceof \DOMElement && $ siblingNode ->hasAttribute ('readability ' )) ? (' with score ' . $ siblingNode ->getAttribute ('readability ' )) : '' ));
1327
1331
1328
1332
if ($ siblingNode ->isSameNode ($ topCandidate )) {
1329
1333
$ append = true ;
1330
1334
} else {
1331
1335
$ contentBonus = 0 ;
1332
1336
1333
1337
// Give a bonus if sibling nodes and top candidates have the same classname.
1334
- if (\ XML_ELEMENT_NODE === $ siblingNode-> nodeType && $ siblingNode ->getAttribute ('class ' ) === $ topCandidate ->getAttribute ('class ' ) && '' !== $ topCandidate ->getAttribute ('class ' )) {
1338
+ if ($ siblingNode instanceof \DOMElement && $ siblingNode ->getAttribute ('class ' ) === $ topCandidate ->getAttribute ('class ' ) && '' !== $ topCandidate ->getAttribute ('class ' )) {
1335
1339
$ contentBonus += ((int ) $ topCandidate ->getAttribute ('readability ' )) * 0.2 ;
1336
1340
}
1337
1341
1338
- if (\ XML_ELEMENT_NODE === $ siblingNode-> nodeType && $ siblingNode ->hasAttribute ('readability ' ) && (((int ) $ siblingNode ->getAttribute ('readability ' )) + $ contentBonus ) >= $ siblingScoreThreshold ) {
1342
+ if ($ siblingNode instanceof \DOMElement && $ siblingNode ->hasAttribute ('readability ' ) && (((int ) $ siblingNode ->getAttribute ('readability ' )) + $ contentBonus ) >= $ siblingScoreThreshold ) {
1339
1343
$ append = true ;
1340
1344
} elseif (0 === strcasecmp ($ siblingNodeName , 'p ' )) {
1341
1345
$ linkDensity = (int ) $ this ->getLinkDensity ($ siblingNode );
@@ -1565,7 +1569,7 @@ private function getAncestors(\DOMElement $node, int $maxDepth = 0): array
1565
1569
1566
1570
private function isPhrasingContent ($ node ): bool
1567
1571
{
1568
- return \ XML_TEXT_NODE === $ node-> nodeType
1572
+ return $ node instanceof \DOMText
1569
1573
|| \in_array (strtoupper ($ node ->nodeName ), $ this ->phrasingElements , true )
1570
1574
|| (
1571
1575
\in_array (strtoupper ($ node ->nodeName ), ['A ' , 'DEL ' , 'INS ' ], true )
0 commit comments