Skip to content

Commit 6505b9c

Browse files
committed
Moved all files up a level in the src folder as it is pointless the files being in another folder.
Moved output configuration from its own property into the main config file. Fixed bugs in tag::parse() and tag::parseChildren() where missing closing tags were not handled correctly, before the system only checked the parent tag to know whether the closing tag was missing and didn't handle a closing parent tag correctly. It now closes all tags up to the close tag that is detected, otherwise they are discarded, and uses the entire parent tag chain to detect this. Added tests.
1 parent 34f294b commit 6505b9c

File tree

12 files changed

+116
-61
lines changed

12 files changed

+116
-61
lines changed

index.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
}
1111
});
1212

13-
$base = empty($_POST['base']) ? '' : $_POST['base'];
13+
$base = $_POST['base'] ?? null;
1414
$input = '';
1515
$output = '';
1616
$minify = Array();
@@ -262,7 +262,7 @@
262262
</div>
263263
<?php if ($output) { ?>
264264
<input type="hidden" name="base" value="<?= htmlspecialchars($base); ?>" />
265-
<iframe class="minify__preview" srcdoc="<?= htmlspecialchars(str_replace('</title>', '</title><base href="'.htmlspecialchars($base).'">', $output)); ?>"></iframe>
265+
<iframe class="minify__preview" srcdoc="<?= htmlspecialchars(preg_replace('/<head[^>]*>/i', '$0<base href="'.htmlspecialchars($base).'">', $output)); ?>"></iframe>
266266
<?php } ?>
267267
<div class="minify__options">
268268
<h3>Options</h3>

src/autoload.php

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
<?php
22
spl_autoload_register(function (string $class) : bool {
3-
$dir = __DIR__.'/htmldoc';
43
$classes = [
5-
'hexydec\\html\\htmldoc' => $dir.'/htmldoc.php',
6-
'hexydec\\html\\config' => $dir.'/config.php',
7-
'hexydec\\html\\tokenise' => $dir.'/tokenise.php',
8-
'hexydec\\html\\token' => $dir.'/tokens/interfaces/token.php',
9-
'hexydec\\html\\comment' => $dir.'/tokens/comment.php',
10-
'hexydec\\html\\doctype' => $dir.'/tokens/doctype.php',
11-
'hexydec\\html\\pre' => $dir.'/tokens/pre.php',
12-
'hexydec\\html\\script' => $dir.'/tokens/script.php',
13-
'hexydec\\html\\style' => $dir.'/tokens/style.php',
14-
'hexydec\\html\\tag' => $dir.'/tokens/tag.php',
15-
'hexydec\\html\\text' => $dir.'/tokens/text.php'
4+
'hexydec\\html\\htmldoc' => __DIR__.'/htmldoc.php',
5+
'hexydec\\html\\config' => __DIR__.'/config.php',
6+
'hexydec\\html\\tokenise' => __DIR__.'/tokenise.php',
7+
'hexydec\\html\\token' => __DIR__.'/tokens/interfaces/token.php',
8+
'hexydec\\html\\comment' => __DIR__.'/tokens/comment.php',
9+
'hexydec\\html\\doctype' => __DIR__.'/tokens/doctype.php',
10+
'hexydec\\html\\pre' => __DIR__.'/tokens/pre.php',
11+
'hexydec\\html\\script' => __DIR__.'/tokens/script.php',
12+
'hexydec\\html\\style' => __DIR__.'/tokens/style.php',
13+
'hexydec\\html\\tag' => __DIR__.'/tokens/tag.php',
14+
'hexydec\\html\\text' => __DIR__.'/tokens/text.php'
1615
];
1716
if (isset($classes[$class])) {
1817
return require($classes[$class]);
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,21 @@ public function __construct(array $config = []) {
137137
'email' => false, // sets the minification presets to email safe options
138138
'style' => [], // specify CSS minifier options
139139
'script' => [] // specify CSS javascript options
140+
],
141+
'output' => [
142+
'charset' => null, // set the output charset
143+
'quotestyle' => 'double', // double, single, minimal
144+
'singletonclose' => null, // string to close singleton tags, or false to leave as is
145+
'closetags' => false, // whether to force tags to have a closing tag (true) or follow tag::close
146+
'xml' => false, // sets the output presets to produce XML valid code
147+
'elements' => [ // output options for particular tags elements
148+
'svg' => [
149+
'xml' => true,
150+
'quotestyle' => 'double', // double, single, minimal
151+
'singletonclose' => '/>', // string to close singleton tags, or false to leave as is
152+
'closetags' => true, // whether to force tags to have a closing tag (true) or follow tag::close
153+
]
154+
]
140155
]
141156
], $config);
142157
}
Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,6 @@ class htmldoc extends config implements \ArrayAccess, \Iterator {
4141
'whitespace' => '\s++',
4242
];
4343

44-
/**
45-
* @var array Contains the output settings
46-
*/
47-
protected $output = [
48-
'charset' => null, // set the output charset
49-
'quotestyle' => 'double', // double, single, minimal
50-
'singletonclose' => null, // string to close singleton tags, or false to leave as is
51-
'closetags' => false, // whether to force tags to have a closing tag (true) or follow tag::close
52-
'xml' => false, // sets the output presets to produce XML valid code
53-
'elements' => [ // output options for particular tags elements
54-
'svg' => [
55-
'xml' => true,
56-
'quotestyle' => 'double', // double, single, minimal
57-
'singletonclose' => '/>', // string to close singleton tags, or false to leave as is
58-
'closetags' => true, // whether to force tags to have a closing tag (true) or follow tag::close
59-
]
60-
]
61-
];
62-
6344
/**
6445
* @var array $children Stores the regexp components keyed by their corresponding codename for tokenising CSS selectors
6546
*/
@@ -82,10 +63,10 @@ class htmldoc extends config implements \ArrayAccess, \Iterator {
8263
* @return mixed The number of children in the object for length, the output config, or null if the parameter doesn't exist
8364
*/
8465
public function __get(string $var) {
85-
if ($var == 'length') {
66+
if ($var == 'config') {
67+
return $this->config;
68+
} elseif ($var == 'length') {
8669
return count($this->children);
87-
} elseif ($var == 'output') {
88-
return $this->output;
8970
}
9071
return null;
9172
}
@@ -139,22 +120,47 @@ public function offsetGet($i) { // return reference so you can set it like an ar
139120
return $this->children[$i] ?? null;
140121
}
141122

123+
/**
124+
* Retrieve the document node in the current position
125+
*
126+
* @return tag|text|comment|doctype The child node at the current pointer position
127+
*/
142128
public function current() {
143129
return $this->children[$this->pointer] ?? null;
144130
}
145131

132+
/**
133+
* Retrieve the the current pointer position for the object
134+
*
135+
* @return scalar The current pointer position
136+
*/
146137
public function key() : scalar {
147138
return $this->pointer;
148139
}
149140

141+
/**
142+
* Increments the pointer position
143+
*
144+
* @return void
145+
*/
150146
public function next() : void {
151147
$this->pointer++;
152148
}
153149

150+
/**
151+
* Decrements the pointer position
152+
*
153+
* @return void
154+
*/
154155
public function rewind() : void {
155156
$this->pointer = 0;
156157
}
157158

159+
/**
160+
* Determines whether there is a node at the current pointer position
161+
*
162+
* @return bool Whether there is a node at the current pointer position
163+
*/
158164
public function valid() : bool {
159165
return isset($this->children[$this->pointer]);
160166
}
@@ -547,7 +553,7 @@ public function minify(array $minify = []) : void {
547553

548554
// set minify output parameters
549555
if ($minify['quotes']) {
550-
$this->output['quotestyle'] = 'minimal';
556+
$this->config['output']['quotestyle'] = 'minimal';
551557
}
552558

553559
// email minification
@@ -597,7 +603,7 @@ public function minify(array $minify = []) : void {
597603
* @return string The compiled HTML
598604
*/
599605
public function html(array $options = []) : string {
600-
$options = $options ? array_merge($this->output, $options) : $this->output;
606+
$options = $options ? array_merge($this->config['output'], $options) : $this->config['output'];
601607

602608
// presets
603609
if (!empty($options['xml'])) {
Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,21 @@ class tag implements token {
1010
*/
1111
protected $root;
1212

13+
/**
14+
* @var array The object configuration
15+
*/
16+
protected $config = [];
17+
1318
/**
1419
* @var tag The parent tag object
1520
*/
1621
protected $parent = null;
1722

23+
/**
24+
* @var array Cache for the list of parent tags
25+
*/
26+
protected $parenttags = null;
27+
1828
/**
1929
* @var string The type of tag
2030
*/
@@ -51,6 +61,8 @@ public function __construct(htmldoc $root, string $tag = null, tag $parent = nul
5161
$this->root = $root;
5262
$this->tagName = $tag;
5363
$this->parent = $parent;
64+
$this->config = $this->root->config; // cache the config
65+
$this->close = !in_array($tag, $this->config['elements']['closeoptional']);
5466
}
5567

5668
/**
@@ -73,8 +85,7 @@ public function parse(tokenise $tokens) : void {
7385

7486
// if you end up here, you are parsing an unclosed tag
7587
case 'tagopenstart':
76-
$tag = trim($token['value'], '<');
77-
$this->close = false;
88+
$tag = ltrim($token['value'], '<');
7889
$tokens->prev();
7990
break 2;
8091

@@ -100,7 +111,7 @@ public function parse(tokenise $tokens) : void {
100111
break;
101112

102113
case 'tagopenend':
103-
if (!in_array($tag, $this->root->getConfig('elements', 'singleton'))) {
114+
if (!in_array($tag, $this->config['elements']['singleton'])) {
104115
$this->children = $this->parseChildren($tokens);
105116
break;
106117
} else {
@@ -109,22 +120,25 @@ public function parse(tokenise $tokens) : void {
109120
}
110121

111122
case 'tagselfclose':
112-
if (in_array($tag, $this->root->getConfig('elements', 'singleton'))) {
123+
if (in_array($tag, $this->config['elements']['singleton'])) {
113124
$this->singleton = $token['value'];
114125
}
115126
break 2;
116127

117128
case 'tagclose':
118-
$close = trim($token['value'], "</ \r\n\t>");
129+
$close = mb_strtolower(trim($token['value'], "</ \r\n\t>"));
119130

120131
// if tags same, we are closing this tag, go back to parent
121-
if (strcasecmp($close, $tag) === 0) {
122-
break 2;
132+
if (in_array($close, $this->getParentTagNames())) {
123133

124-
// same as parent tag and close optional
125-
} elseif ($this->parent->tagName && strcasecmp($this->parent->tagName, $close) === 0 && in_array($tag, $this->root->getConfig('elements', 'closeoptional'))) {
126-
$this->close = false;
127-
$tokens->prev(); // close the tag on parent
134+
// when it is not our tag, pass it to the parent to handle
135+
if ($close != $tag) {
136+
$tokens->prev();
137+
138+
// otherwise we are closing ourself
139+
} else {
140+
$this->close = true;
141+
}
128142
break 2;
129143

130144
// ignore the closing tag
@@ -149,6 +163,21 @@ public function parse(tokenise $tokens) : void {
149163
}
150164
}
151165

166+
/**
167+
* Retrieves an array of all the parent tag names of this node
168+
*
169+
* @return array An array of parent tag names
170+
*/
171+
public function getParentTagNames() : array {
172+
if (!$this->parenttags) {
173+
$this->parenttags = $this->parent ? $this->parent->getParentTagNames() : [];
174+
if ($this->tagName) {
175+
$this->parenttags[] = mb_strtolower($this->tagName);
176+
}
177+
}
178+
return $this->parenttags;
179+
}
180+
152181
/**
153182
* Parses an array of tokens into an HTML documents
154183
*
@@ -161,16 +190,17 @@ public function parseChildren(tokenise $tokens) : array {
161190
$children = [];
162191

163192
// process custom tags
164-
if ($parenttag && ($custom = $root->getConfig('custom', $parenttag)) !== null) {
165-
$item = new $custom['class']($root);
193+
if ($parenttag && isset($this->config['custom'][$parenttag])) {
194+
$item = new $this->config['custom'][$parenttag]['class']($root);
166195
$item->parse($tokens);
167196
$children[] = $item;
197+
$this->close = true;
168198

169199
// parse children
170-
} elseif (($token = $tokens->next()) !== null) {
200+
} else {
171201
$tag = null;
172-
$optional = $this->root->getConfig('elements', 'closeoptional');
173-
do {
202+
$optional = $this->config['elements']['closeoptional'];
203+
while (($token = $tokens->next()) !== null) {
174204
switch ($token['type']) {
175205
case 'doctype':
176206
$item = new doctype($root);
@@ -198,7 +228,7 @@ public function parseChildren(tokenise $tokens) : array {
198228
$close = trim($token['value'], "</ \r\n\t>");
199229

200230
// prevent dropping down a level when tags don't match or close is optional
201-
if ($parenttag && strcasecmp($close, $parenttag) === 0 || in_array($parenttag, $optional)) {
231+
if (in_array(mb_strtolower($close), $this->getParentTagNames())) {
202232
$tokens->prev(); // let the parent parse() method handle it
203233
break 2;
204234
}
@@ -216,7 +246,7 @@ public function parseChildren(tokenise $tokens) : array {
216246
$children[] = $item;
217247
break;
218248
}
219-
} while (($token = $tokens->next()) !== null);
249+
}
220250
}
221251
return $children;
222252
}
@@ -228,7 +258,7 @@ public function parseChildren(tokenise $tokens) : array {
228258
* @return void
229259
*/
230260
public function minify(array $minify) : void {
231-
$config = $this->root->getConfig();
261+
$config = $this->config;
232262
$attr = $config['attributes'];
233263
if ($minify['lowercase']) {
234264
$this->tagName = mb_strtolower($this->tagName);
@@ -603,7 +633,8 @@ public function html(array $options = []) : string {
603633
$tag = $this->tagName;
604634

605635
// merge output options + custom
606-
$options = array_merge($this->root->output, $options, $this->root->output['elements'][$tag] ?? []);
636+
$output = $this->config['output'];
637+
$options = array_merge($output, $options, $output['elements'][$tag] ?? []);
607638

608639
// compile attributes
609640
$html = '<'.$tag;
@@ -613,7 +644,7 @@ public function html(array $options = []) : string {
613644
$empty = in_array($value, [null, ''], true);
614645

615646
// unquoted
616-
if (!$options['xml'] && $options['quotestyle'] == 'minimal' && strcspn($value, " =\"'`<>\n\r\t/") == strlen($value)) {
647+
if (!$empty && !$options['xml'] && $options['quotestyle'] == 'minimal' && strcspn($value, " =\"'`<>\n\r\t/") == strlen($value)) {
617648
$html .= '='.$value;
618649

619650
// single quotes || swap when minimal and there are double quotes in the string
@@ -645,7 +676,7 @@ public function html(array $options = []) : string {
645676
}
646677

647678
/**
648-
* Retrieves the hild tokens as an array
679+
* Retrieves the child tokens as an array
649680
*
650681
* @return array An array of tokens
651682
*/

0 commit comments

Comments
 (0)