@@ -987,9 +987,9 @@ protected function initializeNode(\DOMElement $node): void
987
987
* Using a variety of metrics (content score, classname, element types), find the content that is
988
988
* most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
989
989
*
990
- * @return \DOMElement |false
990
+ * @return JSLikeHTMLElement |false
991
991
*/
992
- protected function grabArticle (?\ DOMElement $ page = null )
992
+ protected function grabArticle (?JSLikeHTMLElement $ page = null )
993
993
{
994
994
if (!$ page ) {
995
995
$ page = $ this ->dom ;
@@ -1166,7 +1166,7 @@ protected function grabArticle(?\DOMElement $page = null)
1166
1166
for ($ c = $ candidates ->length - 1 ; $ c >= 0 ; --$ c ) {
1167
1167
$ node = $ candidates ->item ($ c );
1168
1168
// node should be readable but not inside of an article otherwise it's probably non-readable block
1169
- if ($ node ->hasAttribute ('readability ' ) && (int ) $ node ->getAttributeNode ('readability ' )->value < 40 && ($ node ->parentNode instanceof \DOMElement ? 0 !== strcasecmp ($ node ->parentNode ->tagName , 'article ' ) : true )) {
1169
+ if ($ node ->hasAttribute ('readability ' ) && (int ) $ node ->getAttributeNode ('readability ' )->value < 40 && ($ node ->parentNode instanceof JSLikeHTMLElement ? 0 !== strcasecmp ($ node ->parentNode ->tagName , 'article ' ) : true )) {
1170
1170
$ this ->logger ->debug ('Removing unlikely candidate (using note) ' . $ node ->getNodePath () . ' by " ' . $ node ->tagName . '" with readability ' . self ::getContentScore ($ node ));
1171
1171
$ node ->parentNode ->removeChild ($ node );
1172
1172
}
@@ -1302,7 +1302,7 @@ protected function grabArticle(?\DOMElement $page = null)
1302
1302
if (0 === strcasecmp ($ tagName , 'td ' ) || 0 === strcasecmp ($ tagName , 'tr ' )) {
1303
1303
$ up = $ topCandidate ;
1304
1304
1305
- if ($ up ->parentNode instanceof \DOMElement ) {
1305
+ if ($ up ->parentNode instanceof JSLikeHTMLElement ) {
1306
1306
$ up = $ up ->parentNode ;
1307
1307
1308
1308
if (0 === strcasecmp ($ up ->tagName , 'table ' )) {
@@ -1327,19 +1327,19 @@ protected function grabArticle(?\DOMElement $page = null)
1327
1327
$ siblingNode = $ siblingNodes ->item ($ s );
1328
1328
$ siblingNodeName = $ siblingNode ->nodeName ;
1329
1329
$ append = false ;
1330
- $ this ->logger ->debug ('Looking at sibling node: ' . $ siblingNode ->getNodePath () . (($ siblingNode instanceof \DOMElement && $ siblingNode ->hasAttribute ('readability ' )) ? (' with score ' . $ siblingNode ->getAttribute ('readability ' )) : '' ));
1330
+ $ this ->logger ->debug ('Looking at sibling node: ' . $ siblingNode ->getNodePath () . (($ siblingNode instanceof JSLikeHTMLElement && $ siblingNode ->hasAttribute ('readability ' )) ? (' with score ' . $ siblingNode ->getAttribute ('readability ' )) : '' ));
1331
1331
1332
1332
if ($ siblingNode ->isSameNode ($ topCandidate )) {
1333
1333
$ append = true ;
1334
1334
} else {
1335
1335
$ contentBonus = 0 ;
1336
1336
1337
1337
// Give a bonus if sibling nodes and top candidates have the same classname.
1338
- if ($ siblingNode instanceof \DOMElement && $ siblingNode ->getAttribute ('class ' ) === $ topCandidate ->getAttribute ('class ' ) && '' !== $ topCandidate ->getAttribute ('class ' )) {
1338
+ if ($ siblingNode instanceof JSLikeHTMLElement && $ siblingNode ->getAttribute ('class ' ) === $ topCandidate ->getAttribute ('class ' ) && '' !== $ topCandidate ->getAttribute ('class ' )) {
1339
1339
$ contentBonus += ((int ) $ topCandidate ->getAttribute ('readability ' )) * 0.2 ;
1340
1340
}
1341
1341
1342
- if ($ siblingNode instanceof \DOMElement && $ siblingNode ->hasAttribute ('readability ' ) && (((int ) $ siblingNode ->getAttribute ('readability ' )) + $ contentBonus ) >= $ siblingScoreThreshold ) {
1342
+ if ($ siblingNode instanceof JSLikeHTMLElement && $ siblingNode ->hasAttribute ('readability ' ) && (((int ) $ siblingNode ->getAttribute ('readability ' )) + $ contentBonus ) >= $ siblingScoreThreshold ) {
1343
1343
$ append = true ;
1344
1344
} elseif (0 === strcasecmp ($ siblingNodeName , 'p ' )) {
1345
1345
$ linkDensity = (int ) $ this ->getLinkDensity ($ siblingNode );
@@ -1426,7 +1426,7 @@ protected function grabArticle(?\DOMElement $page = null)
1426
1426
* Get an element weight by attribute.
1427
1427
* Uses regular expressions to tell if this element looks good or bad.
1428
1428
*/
1429
- protected function weightAttribute (\ DOMElement $ element , string $ attribute ): int
1429
+ protected function weightAttribute (JSLikeHTMLElement $ element , string $ attribute ): int
1430
1430
{
1431
1431
if (!$ element ->hasAttribute ($ attribute )) {
1432
1432
return 0 ;
@@ -1470,7 +1470,7 @@ protected function reinitBody(): void
1470
1470
*
1471
1471
* @param callable(float): float $f
1472
1472
*/
1473
- private static function updateContentScore (\ DOMElement $ element , callable $ f ): void
1473
+ private static function updateContentScore (JSLikeHTMLElement $ element , callable $ f ): void
1474
1474
{
1475
1475
$ readabilityAttr = $ element ->getAttributeNode ('readability ' );
1476
1476
$ prevScore = (float ) $ readabilityAttr ->value ;
@@ -1480,7 +1480,7 @@ private static function updateContentScore(\DOMElement $element, callable $f): v
1480
1480
/**
1481
1481
* Gets the content score for given element.
1482
1482
*/
1483
- private static function getContentScore (\ DOMElement $ element ): float
1483
+ private static function getContentScore (JSLikeHTMLElement $ element ): float
1484
1484
{
1485
1485
return $ element ->hasAttribute ('readability ' ) ? (float ) $ element ->getAttribute ('readability ' ) : 0 ;
1486
1486
}
@@ -1552,11 +1552,11 @@ private function loadHtml(): void
1552
1552
$ this ->dom ->registerNodeClass (\DOMElement::class, JSLikeHTMLElement::class);
1553
1553
}
1554
1554
1555
- private function getAncestors (\ DOMElement $ node , int $ maxDepth = 0 ): array
1555
+ private function getAncestors (JSLikeHTMLElement $ node , int $ maxDepth = 0 ): array
1556
1556
{
1557
1557
$ ancestors = [];
1558
1558
$ i = 0 ;
1559
- while ($ node ->parentNode instanceof \DOMElement ) {
1559
+ while ($ node ->parentNode instanceof JSLikeHTMLElement ) {
1560
1560
$ ancestors [] = $ node ->parentNode ;
1561
1561
if (++$ i === $ maxDepth ) {
1562
1562
break ;
@@ -1589,10 +1589,10 @@ private function isPhrasingContent($node): bool
1589
1589
* Returns false if `$node` contains non-empty text nodes
1590
1590
* or if it contains no element with given tag or more than 1 element.
1591
1591
*/
1592
- private function hasSingleTagInsideElement (\ DOMElement $ node , string $ tag ): bool
1592
+ private function hasSingleTagInsideElement (JSLikeHTMLElement $ node , string $ tag ): bool
1593
1593
{
1594
1594
$ childNodes = iterator_to_array ($ node ->childNodes );
1595
- $ children = array_filter ($ childNodes , fn ($ childNode ) => $ childNode instanceof \DOMElement );
1595
+ $ children = array_filter ($ childNodes , fn ($ childNode ) => $ childNode instanceof JSLikeHTMLElement );
1596
1596
1597
1597
// There should be exactly 1 element child with given tag
1598
1598
if (1 !== \count ($ children ) || $ children [0 ]->nodeName !== $ tag ) {
@@ -1613,7 +1613,7 @@ private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool
1613
1613
* Tidy must be configured to not clean the input for this function to
1614
1614
* work as expected, see $this->tidy_config['clean']
1615
1615
*/
1616
- private function isNodeVisible (\ DOMElement $ node ): bool
1616
+ private function isNodeVisible (JSLikeHTMLElement $ node ): bool
1617
1617
{
1618
1618
return !(
1619
1619
$ node ->hasAttribute ('style ' )
0 commit comments