Skip to content

Commit 85bba83

Browse files
committed
Bump openhtmltopdf 1.1.37, pdfbox 3.0.7, jsoup 1.22.1
Allow self-closing unknown HTML tags for compatibility with real-world HTML
1 parent c34ea67 commit 85bba83

4 files changed

Lines changed: 24 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ Major rewrite switching from Flying Saucer/iText to OpenHTMLToPDF + PDFBox 3.x.
1414

1515
| Component | Version | Purpose |
1616
|-----------|---------|---------|
17-
| OpenHTMLToPDF | 1.1.24 | HTML/CSS to PDF rendering |
18-
| PDFBox | 3.0.5 | PDF manipulation (cfpdf actions) |
19-
| jsoup | 1.18.3 | HTML parsing/cleanup |
17+
| OpenHTMLToPDF | 1.1.37 | HTML/CSS to PDF rendering |
18+
| PDFBox | 3.0.7 | PDF manipulation (cfpdf actions) |
19+
| jsoup | 1.22.1 | HTML parsing/cleanup |
2020

2121
### New cfpdf Actions
2222

@@ -183,6 +183,8 @@ The `scale` attribute (1-100) now works, rendering content at the specified perc
183183
- **No License Issues**: All open source libraries (Apache 2.0, LGPL)
184184
- **Better CSS Support**: CSS 2.1 with some CSS3 support via OpenHTMLToPDF
185185
- **Modern PDFBox**: PDFBox 3.x with improved performance and security
186+
- **Self-closing HTML tags**: Unknown/custom tags are allowed to self-close for compatibility with real-world HTML
187+
- **Removed legacy v2 dist jars**: Cleaned out old iText, Flying Saucer, TagSoup, and pre-release PDFBox jars
186188

187189
### Removed Features
188190

source/java/pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232

3333
<properties>
3434
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
35-
<openhtmltopdf.version>1.1.24</openhtmltopdf.version>
36-
<pdfbox.version>3.0.5</pdfbox.version>
35+
<openhtmltopdf.version>1.1.37</openhtmltopdf.version>
36+
<pdfbox.version>3.0.7</pdfbox.version>
3737
</properties>
3838

3939
<build>
@@ -226,7 +226,7 @@
226226
<dependency>
227227
<groupId>org.jsoup</groupId>
228228
<artifactId>jsoup</artifactId>
229-
<version>1.18.3</version>
229+
<version>1.22.1</version>
230230
</dependency>
231231

232232
<!-- Lucee (provided - not bundled) -->

source/java/src/org/lucee/extension/pdf/PDFDocument.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939

4040
import org.jsoup.Jsoup;
4141
import org.jsoup.helper.W3CDom;
42+
import org.jsoup.parser.Parser;
43+
import org.jsoup.parser.Tag;
4244
import org.w3c.dom.Document;
4345

4446
import com.openhtmltopdf.outputdevice.helper.BaseRendererBuilder.PageSizeUnits;
@@ -430,8 +432,12 @@ public byte[] render(Dimension dimension, double unitFactor, PageContext pc, boo
430432
String baseUrl = getBaseUrl(pc);
431433

432434
// Parse HTML with JSoup and convert to W3C DOM
433-
org.jsoup.nodes.Document jsoupDoc = Jsoup.parse(html);
434-
jsoupDoc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
435+
Parser parser = Parser.htmlParser();
436+
parser.tagSet().onNewTag( tag -> {
437+
if ( !tag.isKnownTag() ) tag.set( Tag.SelfClose );
438+
});
439+
org.jsoup.nodes.Document jsoupDoc = Jsoup.parse( html, parser );
440+
jsoupDoc.outputSettings().syntax( org.jsoup.nodes.Document.OutputSettings.Syntax.xml );
435441

436442
// Convert local file paths to file:// URIs for OpenHTMLToPDF compatibility
437443
convertLocalPathsToURIs(jsoupDoc);

source/java/src/org/lucee/extension/pdf/util/XMLUtil.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
import org.jsoup.Jsoup;
1616
import org.jsoup.helper.W3CDom;
17+
import org.jsoup.parser.Parser;
18+
import org.jsoup.parser.Tag;
1719
import org.w3c.dom.Document;
1820
import org.w3c.dom.Element;
1921
import org.w3c.dom.Node;
@@ -100,8 +102,12 @@ else if (xml.getSystemId() != null) {
100102
}
101103

102104
// Parse with jsoup and convert to W3C DOM
103-
org.jsoup.nodes.Document jsoupDoc = Jsoup.parse(html);
104-
jsoupDoc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
105+
Parser parser = Parser.htmlParser();
106+
parser.tagSet().onNewTag( tag -> {
107+
if ( !tag.isKnownTag() ) tag.set( Tag.SelfClose );
108+
});
109+
org.jsoup.nodes.Document jsoupDoc = Jsoup.parse( html, parser );
110+
jsoupDoc.outputSettings().syntax( org.jsoup.nodes.Document.OutputSettings.Syntax.xml );
105111
W3CDom w3cDom = new W3CDom();
106112
return w3cDom.fromJsoup(jsoupDoc);
107113
}

0 commit comments

Comments
 (0)