Skip to content

Commit 7f195cd

Browse files
committed
Merge branch 'master' of github.com:hexydec/htmldoc
2 parents 552c44d + d9c4137 commit 7f195cd

File tree

6 files changed

+76
-42
lines changed

6 files changed

+76
-42
lines changed

readme.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ $ vendor/bin/phpunit
9595

9696
## Support
9797

98-
HTMLdoc supports PHP version 7.4+.
98+
HTMLdoc supports PHP version 8.0+.
9999

100100
## Contributing
101101

src/helpers/selector.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ public function parse(tokenise $tokens) {
109109
case 'bracketclose':
110110
$selectors[] = $parts;
111111
$parts = [];
112-
break;
112+
break 2;
113113
}
114114
} while (($token = $tokens->next()) !== null);
115115
if ($parts) {

src/htmldoc.php

+29-25
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ class htmldoc extends config implements \ArrayAccess, \Iterator {
6767
* @param string $var The name of the property to retrieve, currently 'length' and output
6868
* @return mixed The number of children in the object for length, the output config, or null if the parameter doesn't exist
6969
*/
70-
#[\ReturnTypeWillChange]
71-
public function __get(string $var) {
70+
public function __get(string $var) : mixed {
7271
if ($var === 'config') {
7372
return $this->config;
7473
} elseif ($var === 'length') {
@@ -92,7 +91,7 @@ public function toArray() : array {
9291
* @param mixed $i The key to be updated, can be a string or integer
9392
* @param mixed $value The value of the array key in the children array to be updated
9493
*/
95-
public function offsetSet($i, $value) : void {
94+
public function offsetSet(mixed $i, mixed $value) : void {
9695
$this->children[$i] = $value;
9796
}
9897

@@ -102,7 +101,7 @@ public function offsetSet($i, $value) : void {
102101
* @param mixed $i The key to be checked
103102
* @return bool Whether the key exists in the config array
104103
*/
105-
public function offsetExists($i) : bool {
104+
public function offsetExists(mixed $i) : bool {
106105
return isset($this->children[$i]);
107106
}
108107

@@ -111,7 +110,7 @@ public function offsetExists($i) : bool {
111110
*
112111
* @param mixed $i The key to be removed
113112
*/
114-
public function offsetUnset($i) : void {
113+
public function offsetUnset(mixed $i) : void {
115114
unset($this->children[$i]);
116115
}
117116

@@ -121,8 +120,7 @@ public function offsetUnset($i) : void {
121120
* @param mixed $i The key to be accessed, can be a string or integer
122121
* @return mixed An HTMLdoc object containing the child node at the requested position or null if there is no child at the requested position
123122
*/
124-
#[\ReturnTypeWillChange]
125-
public function offsetGet($i) { // return reference so you can set it like an array
123+
public function offsetGet(mixed $i) : mixed { // return reference so you can set it like an array
126124
if (isset($this->children[$i])) {
127125
$obj = new htmldoc($this->config);
128126
$obj->collection([$this->children[$i]]);
@@ -136,8 +134,7 @@ public function offsetGet($i) { // return reference so you can set it like an ar
136134
*
137135
* @return mixed An HTMLdoc object containing the child node at the current pointer position or null if there are no children
138136
*/
139-
#[\ReturnTypeWillChange]
140-
public function current() {
137+
public function current() : mixed {
141138
if (isset($this->children[$this->pointer])) {
142139
$obj = new htmldoc($this->config);
143140
$obj->collection([$this->children[$this->pointer]]);
@@ -151,8 +148,7 @@ public function current() {
151148
*
152149
* @return mixed The current pointer position
153150
*/
154-
#[\ReturnTypeWillChange]
155-
public function key() {
151+
public function key() : mixed {
156152
return $this->pointer;
157153
}
158154

@@ -191,7 +187,7 @@ public function valid() : bool {
191187
* @param ?string &$error A reference to any user error that is generated
192188
* @return string|false The loaded HTML, or false on error
193189
*/
194-
public function open(string $url, $context = null, ?string &$error = null) {
190+
public function open(string $url, $context = null, ?string &$error = null) : string|false {
195191

196192
// check resource
197193
if ($context !== null && !\is_resource($context)) {
@@ -295,9 +291,9 @@ protected function isEncodingValid(string $charset) : bool {
295291
* Parses an array of tokens into an HTML document
296292
*
297293
* @param string|htmldoc $html A string of HTML, or an htmldoc object
298-
* @return bool|array An array of node objects or false on error
294+
* @return array|false An array of node objects or false on error
299295
*/
300-
protected function parse($html) {
296+
protected function parse(string|htmldoc $html) : array|false {
301297

302298
// convert string to nodes
303299
if (\is_string($html)) {
@@ -346,9 +342,9 @@ public function cache(string $key, array $values) : void {
346342
* Retrieves the tag object at the specified index, or all children of type tag
347343
*
348344
* @param int $index The index of the child tag to retrieve
349-
* @return mixed A tag object if index is specified, or an array of tag objects, or null if the specified index doesn't exist or the object is empty
345+
* @return tag|array|null A tag object if index is specified, or an array of tag objects, or null if the specified index doesn't exist or the object is empty
350346
*/
351-
public function get(int $index = null) {
347+
public function get(int $index = null) : tag|array|null {
352348

353349
// build children that are tags
354350
$children = [];
@@ -494,7 +490,15 @@ public function text() : string {
494490
* @return void
495491
*/
496492
protected function collection(array $nodes) : void {
497-
$this->children = $nodes;
493+
494+
// only store unique nodes as some find operations can produce the same node multiple times
495+
$unique = [];
496+
foreach ($nodes AS $item) {
497+
if (!\in_array($item, $unique, true)) {
498+
$unique[] = $item;
499+
}
500+
}
501+
$this->children = $unique;
498502
}
499503

500504
/**
@@ -599,7 +603,7 @@ public function html(array $options = []) : string {
599603
* @param string|htmldoc $html A string of HTML, or an htmldoc object
600604
* @return htmldoc The current htmldoc object with the nodes appended
601605
*/
602-
public function append($html) : htmldoc {
606+
public function append(string|htmldoc $html) : htmldoc {
603607
if (($nodes = $this->parse($html)) !== false) {
604608
foreach ($this->children AS $item) {
605609
if (\get_class($item) === 'hexydec\\html\\tag') {
@@ -616,7 +620,7 @@ public function append($html) : htmldoc {
616620
* @param string|htmldoc $html A string of HTML, or an htmldoc object
617621
* @return htmldoc The current htmldoc object with the nodes appended
618622
*/
619-
public function prepend($html) : htmldoc {
623+
public function prepend(string|htmldoc $html) : htmldoc {
620624
if (($nodes = $this->parse($html)) !== false) {
621625
foreach ($this->children AS $item) {
622626
if (\get_class($item) === 'hexydec\\html\\tag') {
@@ -633,7 +637,7 @@ public function prepend($html) : htmldoc {
633637
* @param string|htmldoc $html A string of HTML, or an htmldoc object
634638
* @return htmldoc The current htmldoc object with the nodes appended
635639
*/
636-
public function before($html) : htmldoc {
640+
public function before(string|htmldoc $html) : htmldoc {
637641
if (($nodes = $this->parse($html)) !== false) {
638642
foreach ($this->children AS $item) {
639643
if (\get_class($item) === 'hexydec\\html\\tag') {
@@ -650,7 +654,7 @@ public function before($html) : htmldoc {
650654
* @param string|htmldoc $html A string of HTML, or an htmldoc object
651655
* @return htmldoc The current htmldoc object with the nodes appended
652656
*/
653-
public function after($html) : htmldoc {
657+
public function after(string|htmldoc $html) : htmldoc {
654658
if (($nodes = $this->parse($html)) !== false) {
655659
foreach ($this->children AS $item) {
656660
if (\get_class($item) === 'hexydec\\html\\tag') {
@@ -664,10 +668,10 @@ public function after($html) : htmldoc {
664668
/**
665669
* Removes all top level nodes, or if $selector is specified, the nodes matched by the selector
666670
*
667-
* @param string $selector A CSS selector to refine the nodes to delete or null to delete top level nodes
671+
* @param ?string $selector A CSS selector to refine the nodes to delete or null to delete top level nodes
668672
* @return htmldoc The current htmldoc object with the requested nodes deleted
669673
*/
670-
public function remove(string $selector = null) : htmldoc {
674+
public function remove(?string $selector = null) : htmldoc {
671675
$obj = $selector ? $this->find($selector) : $this;
672676
foreach ($obj->children AS $item) {
673677
if (\get_class($item) === 'hexydec\\html\\tag') {
@@ -682,9 +686,9 @@ public function remove(string $selector = null) : htmldoc {
682686
*
683687
* @param string|null $file The file location to save the document to, or null to just return the compiled code
684688
* @param array $options An array indicating output options, this is merged with htmldoc::$output
685-
* @return string|bool The compiled HTML, or false if the file could not be saved
689+
* @return string|false The compiled HTML, or false if the file could not be saved
686690
*/
687-
public function save(string $file = null, array $options = []) {
691+
public function save(?string $file = null, array $options = []) : string|false {
688692

689693
// compile html
690694
$html = $this->html($options);

src/tokens/tag.php

+23-6
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public function __set(string $name, $value) : void {
9494
*
9595
* @return void
9696
*/
97-
public function __clone() {
97+
public function __clone() : void {
9898
foreach ($this->children AS &$item) {
9999
$item = clone $item;
100100
}
@@ -287,7 +287,7 @@ public function parseChildren(tokenise $tokens) : array {
287287
/**
288288
* Returns the parent of the current object
289289
*
290-
* @return tag The parent tag
290+
* @return ?tag The parent tag
291291
*/
292292
public function parent() : ?tag {
293293
return $this->parent;
@@ -641,11 +641,29 @@ public function find(array $selector, bool $searchChildren = true) : array {
641641
}
642642

643643
// pass rest of selector to level below
644-
if ($item['join'] && $i) {
644+
if (\in_array($item['join'], [' ', '>'], true) && $i) {
645645
$match = false;
646+
$childselector = \array_slice($selector, $i);
646647
foreach ($this->children AS $child) {
647648
if (\get_class($child) === 'hexydec\\html\\tag') {
648-
$found = \array_merge($found, $child->find(\array_slice($selector, $i)));
649+
$found = \array_merge($found, $child->find($childselector));
650+
}
651+
}
652+
break;
653+
654+
// find siblings
655+
} elseif (\in_array($item['join'], ['+', '~'], true) && $i) {
656+
$match = false;
657+
$siblingselector = \array_slice($selector, $i);
658+
$search = false;
659+
foreach ($this->parent->children AS $sibling) {
660+
if (!$search && $sibling === $this) {
661+
$search = true;
662+
} elseif ($search && \get_class($sibling) === 'hexydec\\html\\tag') {
663+
$found = \array_merge($found, $sibling->find($siblingselector));
664+
if ($item['join'] === '+') {
665+
break;
666+
}
649667
}
650668
}
651669
break;
@@ -917,8 +935,7 @@ public function children() : array {
917935
*
918936
* @return mixed The value of the requested property
919937
*/
920-
#[\ReturnTypeWillChange]
921-
public function __get(string $var) {
938+
public function __get(string $var) : mixed {
922939
return $this->$var;
923940
}
924941
}

src/tokens/text.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public function __construct(htmldoc $root, ?tag $parent = null) {
3838
* @param mixed $value The value of the property to set
3939
* @return void
4040
*/
41-
public function __set(string $name, $value) : void {
41+
public function __set(string $name, mixed $value) : void {
4242
if ($name === 'parent' && \get_class($value) === 'hexydec\\html\\tag') {
4343
$this->parent = $value;
4444
}
@@ -117,7 +117,7 @@ public function minify(array $minify) : void {
117117
}
118118
}
119119

120-
protected function getIndex($children) {
120+
protected function getIndex(array $children) : int|false {
121121
foreach ($children AS $key => $value) {
122122
if ($value === $this) {
123123
return $key;

tests/findHtmldocTest.php

+20-7
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,23 @@ public function testCanFindElements() {
1919
$this->assertEquals($doc->length, 4, 'Can count elements');
2020
// var_dump($doc->find('title'));
2121
$tests = [
22+
23+
// basic selectors
2224
'title' => '<title>Find</title>',
2325
'.find' => '<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div>',
2426
'#first' => '<div id="first" class="first">First</div>',
27+
'.first, .find__heading, .find__paragraph' => '<div id="first" class="first">First</div><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
28+
29+
// combination selectors
30+
'body .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
31+
'body > .find__paragraph' => null,
32+
'.find > .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
33+
'.find__paragraph + a' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
34+
'div[data-attr] ~ div' => '<div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div><div data-word="one two three four">attr</div>',
35+
'.find h1 ~ a' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
36+
'.attributes div ~ div' => '<div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div><div data-word="one two three four">attr</div>',
37+
38+
// attribute selectors
2539
'#first[class]' => '<div id="first" class="first">First</div>',
2640
'[class=first]' => '<div id="first" class="first">First</div>',
2741
'[class^=find]' => '<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
@@ -34,17 +48,16 @@ public function testCanFindElements() {
3448
'a[href$="://github.com/hexydec/htmldoc"]' => null,
3549
'a[href$="://github.com/Hexydec/Htmldoc/"]' => null,
3650
'a[href$="://github.com/Hexydec/Htmldoc/" i]' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
51+
'[data-attr]' => '<div data-attr>attr</div><div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
52+
'[data-attr|=attr]' => '<div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
53+
'[data-word~=three]' => '<div data-word="one two three four">attr</div>',
54+
55+
// pseudo selectors
3756
'.positions div:first-child' => '<div id="first" class="first">First</div>',
3857
'.positions div:last-child' => '<div class="last">Last</div>',
39-
'.first, .find__heading, .find__paragraph' => '<div id="first" class="first">First</div><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
40-
'body .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
41-
'body > .find__paragraph' => null,
42-
'.find > .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
4358
'title:not([class])' => '<title>Find</title>',
4459
'.positions div:not(.find)' => '<div id="first" class="first">First</div><div class="last">Last</div>',
45-
'[data-attr]' => '<div data-attr>attr</div><div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
46-
'[data-attr|=attr]' => '<div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
47-
'[data-word~=three]' => '<div data-word="one two three four">attr</div>'
60+
'body section:not(:first-child) div:last-child' => '<div data-word="one two three four">attr</div>',
4861
];
4962
foreach ($tests AS $key => $item) {
5063
$this->assertEquals($item, $doc->find($key)->html());

0 commit comments

Comments
 (0)