1- /* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2- *
3- * This source is subject to the Microsoft Permissive License.
4- * Please see the License.txt file for more information.
5- * All other rights reserved.
6- *
7- * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8- * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9- * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10- * PARTICULAR PURPOSE.
11- */
12- using System . Collections . Generic ;
13- using System . Globalization ;
14- using System . Linq ;
15- using AngleSharp . Html . Dom ;
16- using DocumentFormat . OpenXml ;
17- using DocumentFormat . OpenXml . Wordprocessing ;
18-
19- namespace HtmlToOpenXml . Expressions ;
20-
21- /// <summary>
22- /// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
23- /// </summary>
24- sealed class FigureCaptionExpression ( IHtmlElement node ) : PhrasingElementExpression ( node )
25- {
26-
27- /// <inheritdoc/>
28- public override IEnumerable < OpenXmlElement > Interpret ( ParsingContext context )
29- {
30- ComposeStyles ( context ) ;
31- var childElements = Interpret ( context . CreateChild ( this ) , node . ChildNodes ) ;
32- if ( ! childElements . Any ( ) )
33- return [ ] ;
34-
35- var p = new Paragraph (
36- new Run (
37- new Text ( "Figure " ) { Space = SpaceProcessingModeValues . Preserve }
38- ) ,
39- new SimpleField (
40- new Run (
41- new Text ( AddFigureCaption ( context ) . ToString ( CultureInfo . InvariantCulture ) ) )
42- ) { Instruction = " SEQ Figure \\ * ARABIC " }
43- ) {
44- ParagraphProperties = new ParagraphProperties {
45- ParagraphStyleId = context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ,
46- KeepNext = new KeepNext ( )
47- }
48- } ;
49-
50- if ( childElements . First ( ) is Run run ) // any caption?
51- {
52- Text ? t = run . GetFirstChild < Text > ( ) ;
53- if ( t != null )
54- t . Text = " " + t . InnerText ; // append a space after the numero of the picture
55- }
56-
57- return [ p ] ;
58- }
59-
60- /// <summary>
61- /// Add a new figure caption to the document.
62- /// </summary>
63- /// <returns>Returns the id of the new figure caption.</returns>
64- private static int AddFigureCaption ( ParsingContext context )
65- {
66- var figCaptionRef = context . Properties < int ? > ( "figCaptionRef" ) ;
67- if ( ! figCaptionRef . HasValue )
68- {
69- figCaptionRef = 0 ;
70- foreach ( var p in context . MainPart . Document . Descendants < SimpleField > ( ) )
71- {
72- if ( p . Instruction == " SEQ Figure \\ * ARABIC " )
73- figCaptionRef ++ ;
74- }
75- }
76- figCaptionRef ++ ;
77-
78- context . Properties ( "figCaptionRef" , figCaptionRef ) ;
79- return figCaptionRef . Value ;
80- }
81- }
1+ /* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2+ *
3+ * This source is subject to the Microsoft Permissive License.
4+ * Please see the License.txt file for more information.
5+ * All other rights reserved.
6+ *
7+ * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8+ * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10+ * PARTICULAR PURPOSE.
11+ */
12+ using System . Collections . Generic ;
13+ using System . Globalization ;
14+ using System . Linq ;
15+ using AngleSharp . Html . Dom ;
16+ using DocumentFormat . OpenXml ;
17+ using DocumentFormat . OpenXml . Wordprocessing ;
18+
19+ namespace HtmlToOpenXml . Expressions ;
20+
21+ /// <summary>
22+ /// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
23+ /// </summary>
24+ sealed class FigureCaptionExpression ( IHtmlElement node ) : BlockElementExpression ( node )
25+ {
26+
27+ /// <inheritdoc/>
28+ public override IEnumerable < OpenXmlElement > Interpret ( ParsingContext context )
29+ {
30+ ComposeStyles ( context ) ;
31+ var childElements = Interpret ( context . CreateChild ( this ) , node . ChildNodes ) ;
32+
33+ var figNumRef = new List < OpenXmlElement > ( ) {
34+ new Run (
35+ new Text ( "Figure " ) { Space = SpaceProcessingModeValues . Preserve }
36+ ) ,
37+ new SimpleField (
38+ new Run (
39+ new Text ( AddFigureCaption ( context ) . ToString ( CultureInfo . InvariantCulture ) ) )
40+ ) { Instruction = " SEQ Figure \\ * ARABIC " }
41+ } ;
42+
43+
44+ if ( ! childElements . Any ( ) )
45+ {
46+ return [ new Paragraph ( figNumRef ) {
47+ ParagraphProperties = new ParagraphProperties {
48+ ParagraphStyleId = context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ,
49+ KeepNext = DetermineKeepNext ( node ) ,
50+ }
51+ } ] ;
52+ }
53+
54+ //Add the figure number references to the start of the first paragraph.
55+ if ( childElements . FirstOrDefault ( ) is Paragraph p )
56+ {
57+ var properties = p . GetFirstChild < ParagraphProperties > ( ) ;
58+ p . InsertAfter ( new Run (
59+ new Text ( " " ) { Space = SpaceProcessingModeValues . Preserve }
60+ ) , properties ) ;
61+ p . InsertAfter ( figNumRef [ 1 ] , properties ) ;
62+ p . InsertAfter ( figNumRef [ 0 ] , properties ) ;
63+ }
64+ else
65+ {
66+ // The first child of the figure caption is a table or something.
67+ // Just prepend a new paragraph with the figure number reference.
68+ childElements = [
69+ new Paragraph ( figNumRef ) ,
70+ ..childElements
71+ ] ;
72+ }
73+
74+ foreach ( var paragraph in childElements . OfType < Paragraph > ( ) )
75+ {
76+ paragraph . ParagraphProperties ??= new ParagraphProperties ( ) ;
77+ paragraph . ParagraphProperties . ParagraphStyleId ??= context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ;
78+ //Keep caption paragraphs together.
79+ paragraph . ParagraphProperties . KeepNext = new KeepNext ( ) ;
80+ }
81+
82+ if ( childElements . OfType < Paragraph > ( ) . LastOrDefault ( ) is Paragraph lastPara )
83+ {
84+ lastPara . ParagraphProperties ! . KeepNext = DetermineKeepNext ( node ) ;
85+ }
86+
87+ return childElements ;
88+ }
89+
90+ /// <summary>
91+ /// Add a new figure caption to the document.
92+ /// </summary>
93+ /// <returns>Returns the id of the new figure caption.</returns>
94+ private static int AddFigureCaption ( ParsingContext context )
95+ {
96+ var figCaptionRef = context . Properties < int ? > ( "figCaptionRef" ) ;
97+ if ( ! figCaptionRef . HasValue )
98+ {
99+ figCaptionRef = 0 ;
100+ foreach ( var p in context . MainPart . Document . Descendants < SimpleField > ( ) )
101+ {
102+ if ( p . Instruction == " SEQ Figure \\ * ARABIC " )
103+ figCaptionRef ++ ;
104+ }
105+ }
106+ figCaptionRef ++ ;
107+
108+ context . Properties ( "figCaptionRef" , figCaptionRef ) ;
109+ return figCaptionRef . Value ;
110+ }
111+
112+ /// <summary>
113+ /// Determines whether the KeepNext property should apply this this caption.
114+ /// </summary>
115+ /// <returns>A new <see cref="KeepNext"/> or null.</returns>
116+ private static KeepNext ? DetermineKeepNext ( IHtmlElement node )
117+ {
118+ // A caption at the end of a figure will have no next sibling.
119+ if ( node . NextElementSibling is null )
120+ {
121+ return null ;
122+ }
123+ return new ( ) ;
124+ }
125+ }
0 commit comments