Skip to content

Commit 8cbb8a8

Browse files
committed
fixup! JSHtml
1 parent ccdd166 commit 8cbb8a8

File tree

1 file changed

+33
-11
lines changed

1 file changed

+33
-11
lines changed

src/Readability.php

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ public function init(): bool
287287

288288
if (null === $articleContent) {
289289
$this->success = false;
290+
/** @var JSLikeHTMLElement */
290291
$articleContent = $this->dom->createElement('div');
291292
$articleContent->setAttribute('class', 'readability-content');
292293
$articleContent->setInnerHtml('<p>Sorry, Readability was unable to parse this page for content.</p>');
@@ -302,7 +303,9 @@ public function init(): bool
302303

303304
// without tidy the body can (sometimes) be wiped, so re-create it
304305
if (false === isset($this->body->childNodes)) {
305-
$this->body = $this->dom->createElement('body');
306+
/** @var JSLikeHTMLElement */
307+
$body = $this->dom->createElement('body');
308+
$this->body = $body;
306309
}
307310

308311
// Clear the old HTML, insert the new content.
@@ -335,19 +338,23 @@ public function postProcessContent(\DOMElement $articleContent): void
335338
*/
336339
public function addFootnotes(\DOMElement $articleContent): void
337340
{
341+
/** @var JSLikeHTMLElement */
338342
$footnotesWrapper = $this->dom->createElement('footer');
339343
$footnotesWrapper->setAttribute('class', 'readability-footnotes');
340344
$footnotesWrapper->setInnerHtml('<h3>References</h3>');
341345
$articleFootnotes = $this->dom->createElement('ol');
342346
$articleFootnotes->setAttribute('class', 'readability-footnotes-list');
343347
$footnotesWrapper->appendChild($articleFootnotes);
348+
/** @var \DOMNodeList<JSLikeHTMLElement> */
344349
$articleLinks = $articleContent->getElementsByTagName('a');
345350
$linkCount = 0;
346351

347352
for ($i = 0; $i < $articleLinks->length; ++$i) {
348353
$articleLink = $articleLinks->item($i);
349354
$footnoteLink = $articleLink->cloneNode(true);
355+
/** @var JSLikeHTMLElement */
350356
$refLink = $this->dom->createElement('a');
357+
/** @var JSLikeHTMLElement */
351358
$footnote = $this->dom->createElement('li');
352359
$linkDomain = @parse_url($footnoteLink->getAttribute('href'), \PHP_URL_HOST);
353360
if (!$linkDomain && isset($this->url)) {
@@ -609,6 +616,7 @@ public function killBreaks(JSLikeHTMLElement $node): void
609616
*/
610617
public function clean(JSLikeHTMLElement $e, string $tag): void
611618
{
619+
/** @var \DOMNodeList<JSLikeHTMLElement> */
612620
$targetList = $e->getElementsByTagName($tag);
613621
$isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag);
614622

@@ -645,6 +653,7 @@ public function cleanConditionally(JSLikeHTMLElement $e, string $tag): void
645653
return;
646654
}
647655

656+
/** @var \DOMNodeList<JSLikeHTMLElement> */
648657
$tagsList = $e->getElementsByTagName($tag);
649658
$curTagsLength = $tagsList->length;
650659

@@ -755,6 +764,7 @@ public function cleanConditionally(JSLikeHTMLElement $e, string $tag): void
755764
public function cleanHeaders(JSLikeHTMLElement $e): void
756765
{
757766
for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) {
767+
/** @var \DOMNodeList<JSLikeHTMLElement> */
758768
$headers = $e->getElementsByTagName('h' . $headerIndex);
759769

760770
for ($i = $headers->length - 1; $i >= 0; --$i) {
@@ -823,6 +833,7 @@ protected function getArticleTitle(): JSLikeHTMLElement
823833
$curTitle = $origTitle;
824834
}
825835

836+
/** @var JSLikeHTMLElement */
826837
$articleTitle = $this->dom->createElement('h1');
827838
$articleTitle->setInnerHtml($curTitle);
828839

@@ -840,7 +851,9 @@ protected function prepDocument(): void
840851
* so we create a new body node and append it to the document.
841852
*/
842853
if (null === $this->body) {
843-
$this->body = $this->dom->createElement('body');
854+
/** @var JSLikeHTMLElement */
855+
$body = $this->dom->createElement('body');
856+
$this->body = $body;
844857
$this->dom->documentElement->appendChild($this->body);
845858
}
846859

@@ -944,6 +957,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
944957
$xpath = new \DOMXPath($page);
945958
}
946959

960+
/** @var \DOMNodeList<JSLikeHTMLElement> */
947961
$allElements = $page->getElementsByTagName('*');
948962

949963
for ($nodeIndex = 0; $allElements->item($nodeIndex); ++$nodeIndex) {
@@ -986,6 +1000,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
9861000
// (as in, where they contain no other block level elements).
9871001
if ('div' === $tagName) {
9881002
if (!preg_match($this->regexps['divToPElements'], $nodeContent)) {
1003+
/** @var JSLikeHTMLElement */
9891004
$newNode = $this->dom->createElement('p');
9901005

9911006
try {
@@ -1156,7 +1171,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
11561171
}
11571172
}
11581173

1159-
/** @var \DOMNodeList<JSLikeHTMLElement> */
1174+
/** @var non-empty-array<JSLikeHTMLElement|null> */
11601175
$topCandidates = array_filter(
11611176
$topCandidates,
11621177
fn ($v, $idx) => 0 === $idx || null !== $v,
@@ -1169,18 +1184,21 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
11691184
* We also have to copy the body node so it is something we can modify.
11701185
*/
11711186
if (null === $topCandidate || 0 === strcasecmp($topCandidate->tagName, 'body')) {
1187+
/** @var JSLikeHTMLElement */
11721188
$topCandidate = $this->dom->createElement('div');
11731189

11741190
if ($page instanceof \DOMDocument) {
1175-
if (!isset($page->documentElement)) {
1191+
/** @var ?JSLikeHTMLElement */
1192+
$documentElement = $page->documentElement;
1193+
if (null === $documentElement) {
11761194
// we don't have a body either? what a mess! :)
11771195
$this->logger->debug('The page has no body!');
11781196
} else {
11791197
$this->logger->debug('Setting body to a raw HTML of original page!');
1180-
$topCandidate->setInnerHtml($page->documentElement->getInnerHTML());
1181-
$page->documentElement->setInnerHtml('');
1198+
$topCandidate->setInnerHtml($documentElement->getInnerHTML());
1199+
$documentElement->setInnerHtml('');
11821200
$this->reinitBody();
1183-
$page->documentElement->appendChild($topCandidate);
1201+
$documentElement->appendChild($topCandidate);
11841202
}
11851203
} else {
11861204
$topCandidate->setInnerHtml($page->getInnerHTML());
@@ -1189,7 +1207,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
11891207
}
11901208

11911209
$this->initializeNode($topCandidate);
1192-
} elseif ($topCandidate) {
1210+
} elseif (null !== $topCandidate) {
11931211
$alternativeCandidateAncestors = [];
11941212
foreach ($topCandidates as $candidate) {
11951213
if ((int) $candidate->getAttribute('readability') / (int) $topCandidate->getAttribute('readability') >= 0.75) {
@@ -1200,7 +1218,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
12001218
}
12011219
if (\count($alternativeCandidateAncestors) >= 3) {
12021220
$parentOfTopCandidate = $topCandidate->parentNode;
1203-
while ('body' !== $parentOfTopCandidate->nodeName) {
1221+
while ('body' !== $parentOfTopCandidate->nodeName && $parentOfTopCandidate instanceof JSLikeHTMLElement) {
12041222
$listsContainingThisAncestor = 0;
12051223
for ($ancestorIndex = 0; $ancestorIndex < \count($alternativeCandidateAncestors) && $listsContainingThisAncestor < 3; ++$ancestorIndex) {
12061224
$listsContainingThisAncestor += (int) \in_array($parentOfTopCandidate, $alternativeCandidateAncestors[$ancestorIndex], true);
@@ -1264,6 +1282,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
12641282
* Now that we have the top candidate, look through its siblings for content that might also be related.
12651283
* Things like preambles, content split by ads that we removed, etc.
12661284
*/
1285+
/** @var JSLikeHTMLElement */
12671286
$articleContent = $this->dom->createElement('div');
12681287
$articleContent->setAttribute('class', 'readability-content');
12691288
$siblingScoreThreshold = max(10, ((int) $topCandidate->getAttribute('readability')) * 0.2);
@@ -1311,6 +1330,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
13111330
if (0 !== strcasecmp($siblingNodeName, 'div') && 0 !== strcasecmp($siblingNodeName, 'p')) {
13121331
// We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident.
13131332
$this->logger->debug('Altering siblingNode "' . $siblingNodeName . '" to "div".');
1333+
/** @var JSLikeHTMLElement */
13141334
$nodeToAppend = $this->dom->createElement('div');
13151335

13161336
try {
@@ -1412,7 +1432,9 @@ protected function weightAttribute(JSLikeHTMLElement $element, string $attribute
14121432
protected function reinitBody(): void
14131433
{
14141434
if (!isset($this->body->childNodes)) {
1415-
$this->body = $this->dom->createElement('body');
1435+
/** @var JSLikeHTMLElement */
1436+
$body = $this->dom->createElement('body');
1437+
$this->body = $body;
14161438
$this->body->setInnerHtml($this->bodyCache);
14171439
}
14181440
}
@@ -1544,7 +1566,7 @@ private function isPhrasingContent($node): bool
15441566
private function getSingleTagInsideElement(JSLikeHTMLElement $node, string $tag): ?JSLikeHTMLElement
15451567
{
15461568
$childNodes = iterator_to_array($node->childNodes);
1547-
$children = array_filter($childNodes, fn ($childNode) => $childNode instanceof \DOMElement);
1569+
$children = array_filter($childNodes, fn ($childNode) => $childNode instanceof JSLikeHTMLElement);
15481570

15491571
// There should be exactly 1 element child with given tag
15501572
if (1 !== \count($children) || $children[0]->nodeName !== $tag) {

0 commit comments

Comments
 (0)