Skip to content

Commit 03b5fb1

Browse files
authored
Release 3.2.5
Release 3.2.5
2 parents 8592c53 + d4174a6 commit 03b5fb1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1288
-744
lines changed

.vscode/settings.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
{
2-
"omnisharp.organizeImportsOnFormat": true,
32
"dotnet.completion.showCompletionItemsFromUnimportedNamespaces": false,
43
"coverage-gutters.coverageFileNames":[
54
"coverage.info"
65
],
76
"coverage-gutters.showGutterCoverage": false,
8-
"coverage-gutters.showLineCoverage": true
7+
"coverage-gutters.showLineCoverage": true,
8+
"dotnet.formatting.organizeImportsOnFormat": true
99
}

CHANGELOG.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,34 @@
11
# Changelog
22

3+
## 3.2.5
4+
5+
- Fix a crash with the new whitespace handling introduced in 3.2.3 #191
6+
- Fix crash when the html contains 2 images with identical source path #193
7+
- Support margin auto for table alignment #194
8+
- Fix handling whitespace between runs #195
9+
- Whitelist more mime-types as specified by the IANA standard #196
10+
- Support EMF file #196
11+
- Correct handling of `figcaption` (allow nested phrasings) #197
12+
- Numbering list now supports type attribute `<ol type="1|a|A|i|I">` #198
13+
- Always restart nested numbering list #198
14+
- Fix table borders being removed even when the specified word table style has borders #199
15+
- Defensive code when download image stream is truncated #201
16+
- Table inside list is constrained to not exceed page margin #202
17+
- Table now supports width:auto for auto-fit content #202
18+
19+
## 3.2.4
20+
21+
- Fix a crash with the new whitespace handling introduced in 3.2.3 #191
22+
- Table inside list must be aligned with the list item #192
23+
24+
## 3.2.3
25+
26+
- Improve support of table alignment #187
27+
- Fix a crash if a span is empty
28+
- Heading with only digits should not be considered as a numbering #189
29+
- Fix whitespaces inserted between spans #179 and #185
30+
- Support percentage size (typically width:100%) for img node #188
31+
332
## 3.2.2
433

534
- Supports a feature to disable heading numbering #175

examples/Demo/Demo.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
<ItemGroup>
2323
<EmbeddedResource Include="Resources\*" />
24+
<Content Include="images\*" CopyToOutputDirectory="PreserveNewest"/>
2425
</ItemGroup>
2526

2627
</Project>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<img src="The-Song-of-the-World.jpg" style="max-width:100%"/>

examples/Demo/app.config

Lines changed: 0 additions & 13 deletions
This file was deleted.
254 KB
Loading

src/Html2OpenXml/Expressions/BlockElementExpression.cs

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class BlockElementExpression: PhrasingElementExpression
2727
{
2828
private readonly OpenXmlLeafElement[]? defaultStyleProperties;
2929
protected readonly ParagraphProperties paraProperties = new();
30+
protected TableProperties? tableProperties;
3031
// some style attributes, such as borders or bgcolor, will convert this node to a framed container
3132
protected bool renderAsFramed;
3233
private HtmlBorder styleBorder;
@@ -115,22 +116,44 @@ protected override IEnumerable<OpenXmlElement> Interpret (
115116
public override void CascadeStyles(OpenXmlElement element)
116117
{
117118
base.CascadeStyles(element);
118-
if (!paraProperties.HasChildren || element is not Paragraph paragraph)
119+
if (!paraProperties.HasChildren)
119120
return;
120121

121-
paragraph.ParagraphProperties ??= new ParagraphProperties();
122-
123-
var knownTags = new HashSet<string>();
124-
foreach (var prop in paragraph.ParagraphProperties)
122+
if (element is Paragraph paragraph)
125123
{
126-
if (!knownTags.Contains(prop.LocalName))
127-
knownTags.Add(prop.LocalName);
128-
}
124+
paragraph.ParagraphProperties ??= new ParagraphProperties();
129125

130-
foreach (var prop in paraProperties)
126+
var knownTags = new HashSet<string>();
127+
foreach (var prop in paragraph.ParagraphProperties)
128+
{
129+
if (!knownTags.Contains(prop.LocalName))
130+
knownTags.Add(prop.LocalName);
131+
}
132+
133+
foreach (var prop in paraProperties)
134+
{
135+
if (!knownTags.Contains(prop.LocalName))
136+
paragraph.ParagraphProperties.AddChild(prop.CloneNode(true));
137+
}
138+
}
139+
else if (tableProperties != null && element is Table table)
131140
{
132-
if (!knownTags.Contains(prop.LocalName))
133-
paragraph.ParagraphProperties.AddChild(prop.CloneNode(true));
141+
var props = table.GetFirstChild<TableProperties>();
142+
if (props is null)
143+
return;
144+
145+
var knownTags = new HashSet<string>();
146+
foreach (var prop in props)
147+
{
148+
if (!knownTags.Contains(prop.LocalName))
149+
knownTags.Add(prop.LocalName);
150+
}
151+
152+
foreach (var prop in tableProperties)
153+
{
154+
if (!knownTags.Contains(prop.LocalName))
155+
props.AddChild(prop.CloneNode(true));
156+
}
134157
}
135158
}
136159

@@ -170,9 +193,12 @@ protected override void ComposeStyles (ParsingContext context)
170193

171194
JustificationValues? align = Converter.ToParagraphAlign(styleAttributes!["text-align"]);
172195
if (!align.HasValue) align = Converter.ToParagraphAlign(node.GetAttribute("align"));
196+
if (!align.HasValue) align = Converter.ToParagraphAlign(styleAttributes["justify-content"]);
173197
if (align.HasValue)
174198
{
175199
paraProperties.Justification = new() { Val = align };
200+
tableProperties ??= new();
201+
tableProperties.TableJustification = new() { Val = align.Value.ToTableRowAlignment() };
176202
}
177203

178204

@@ -194,7 +220,7 @@ protected override void ComposeStyles (ParsingContext context)
194220
}
195221

196222
var margin = styleAttributes.GetMargin("margin");
197-
Indentation? indentation = null;
223+
Indentation? indentation = null;
198224
if (!margin.IsEmpty)
199225
{
200226
if (margin.Top.IsFixed || margin.Bottom.IsFixed)
@@ -345,7 +371,7 @@ private static Paragraph CreateParagraph(ParsingContext context, IList<OpenXmlEl
345371

346372
context.CascadeStyles(p);
347373

348-
p.Append(CombineRuns(runs));
374+
p.Append(runs);
349375

350376
// in Html, if a paragraph is ending with a line break, it is ignored
351377
if (p.LastChild is Run run && run.LastChild is Break lineBreak)
Lines changed: 125 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,125 @@
1-
/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2-
*
3-
* This source is subject to the Microsoft Permissive License.
4-
* Please see the License.txt file for more information.
5-
* All other rights reserved.
6-
*
7-
* THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8-
* KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9-
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10-
* PARTICULAR PURPOSE.
11-
*/
12-
using System.Collections.Generic;
13-
using System.Globalization;
14-
using System.Linq;
15-
using AngleSharp.Html.Dom;
16-
using DocumentFormat.OpenXml;
17-
using DocumentFormat.OpenXml.Wordprocessing;
18-
19-
namespace HtmlToOpenXml.Expressions;
20-
21-
/// <summary>
22-
/// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
23-
/// </summary>
24-
sealed class FigureCaptionExpression(IHtmlElement node) : PhrasingElementExpression(node)
25-
{
26-
27-
/// <inheritdoc/>
28-
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
29-
{
30-
ComposeStyles(context);
31-
var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
32-
if (!childElements.Any())
33-
return [];
34-
35-
var p = new Paragraph (
36-
new Run(
37-
new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve }
38-
),
39-
new SimpleField(
40-
new Run(
41-
new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture)))
42-
) { Instruction = " SEQ Figure \\* ARABIC " }
43-
) {
44-
ParagraphProperties = new ParagraphProperties {
45-
ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle),
46-
KeepNext = new KeepNext()
47-
}
48-
};
49-
50-
if (childElements.First() is Run run) // any caption?
51-
{
52-
Text? t = run.GetFirstChild<Text>();
53-
if (t != null)
54-
t.Text = " " + t.InnerText; // append a space after the numero of the picture
55-
}
56-
57-
return [p];
58-
}
59-
60-
/// <summary>
61-
/// Add a new figure caption to the document.
62-
/// </summary>
63-
/// <returns>Returns the id of the new figure caption.</returns>
64-
private static int AddFigureCaption(ParsingContext context)
65-
{
66-
var figCaptionRef = context.Properties<int?>("figCaptionRef");
67-
if (!figCaptionRef.HasValue)
68-
{
69-
figCaptionRef = 0;
70-
foreach (var p in context.MainPart.Document.Descendants<SimpleField>())
71-
{
72-
if (p.Instruction == " SEQ Figure \\* ARABIC ")
73-
figCaptionRef++;
74-
}
75-
}
76-
figCaptionRef++;
77-
78-
context.Properties("figCaptionRef", figCaptionRef);
79-
return figCaptionRef.Value;
80-
}
81-
}
1+
/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2+
*
3+
* This source is subject to the Microsoft Permissive License.
4+
* Please see the License.txt file for more information.
5+
* All other rights reserved.
6+
*
7+
* THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8+
* KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10+
* PARTICULAR PURPOSE.
11+
*/
12+
using System.Collections.Generic;
13+
using System.Globalization;
14+
using System.Linq;
15+
using AngleSharp.Html.Dom;
16+
using DocumentFormat.OpenXml;
17+
using DocumentFormat.OpenXml.Wordprocessing;
18+
19+
namespace HtmlToOpenXml.Expressions;
20+
21+
/// <summary>
22+
/// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
23+
/// </summary>
24+
sealed class FigureCaptionExpression(IHtmlElement node) : BlockElementExpression(node)
25+
{
26+
27+
/// <inheritdoc/>
28+
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
29+
{
30+
ComposeStyles(context);
31+
var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
32+
33+
var figNumRef = new List<OpenXmlElement>() {
34+
new Run(
35+
new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve }
36+
),
37+
new SimpleField(
38+
new Run(
39+
new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture)))
40+
) { Instruction = " SEQ Figure \\* ARABIC " }
41+
};
42+
43+
44+
if (!childElements.Any())
45+
{
46+
return [new Paragraph(figNumRef) {
47+
ParagraphProperties = new ParagraphProperties {
48+
ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle),
49+
KeepNext = DetermineKeepNext(node),
50+
}
51+
}];
52+
}
53+
54+
//Add the figure number references to the start of the first paragraph.
55+
if(childElements.FirstOrDefault() is Paragraph p)
56+
{
57+
var properties = p.GetFirstChild<ParagraphProperties>();
58+
p.InsertAfter(new Run(
59+
new Text(" ") { Space = SpaceProcessingModeValues.Preserve }
60+
), properties);
61+
p.InsertAfter(figNumRef[1], properties);
62+
p.InsertAfter(figNumRef[0], properties);
63+
}
64+
else
65+
{
66+
// The first child of the figure caption is a table or something.
67+
// Just prepend a new paragraph with the figure number reference.
68+
childElements = [
69+
new Paragraph(figNumRef),
70+
..childElements
71+
];
72+
}
73+
74+
foreach (var paragraph in childElements.OfType<Paragraph>())
75+
{
76+
paragraph.ParagraphProperties ??= new ParagraphProperties();
77+
paragraph.ParagraphProperties.ParagraphStyleId ??= context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle);
78+
//Keep caption paragraphs together.
79+
paragraph.ParagraphProperties.KeepNext = new KeepNext();
80+
}
81+
82+
if(childElements.OfType<Paragraph>().LastOrDefault() is Paragraph lastPara)
83+
{
84+
lastPara.ParagraphProperties!.KeepNext = DetermineKeepNext(node);
85+
}
86+
87+
return childElements;
88+
}
89+
90+
/// <summary>
91+
/// Add a new figure caption to the document.
92+
/// </summary>
93+
/// <returns>Returns the id of the new figure caption.</returns>
94+
private static int AddFigureCaption(ParsingContext context)
95+
{
96+
var figCaptionRef = context.Properties<int?>("figCaptionRef");
97+
if (!figCaptionRef.HasValue)
98+
{
99+
figCaptionRef = 0;
100+
foreach (var p in context.MainPart.Document.Descendants<SimpleField>())
101+
{
102+
if (p.Instruction == " SEQ Figure \\* ARABIC ")
103+
figCaptionRef++;
104+
}
105+
}
106+
figCaptionRef++;
107+
108+
context.Properties("figCaptionRef", figCaptionRef);
109+
return figCaptionRef.Value;
110+
}
111+
112+
/// <summary>
113+
/// Determines whether the KeepNext property should apply this this caption.
114+
/// </summary>
115+
/// <returns>A new <see cref="KeepNext"/> or null.</returns>
116+
private static KeepNext? DetermineKeepNext(IHtmlElement node)
117+
{
118+
// A caption at the end of a figure will have no next sibling.
119+
if(node.NextElementSibling is null)
120+
{
121+
return null;
122+
}
123+
return new();
124+
}
125+
}

0 commit comments

Comments
 (0)