1- /* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2- *
3- * This source is subject to the Microsoft Permissive License.
4- * Please see the License.txt file for more information.
5- * All other rights reserved.
6- *
7- * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8- * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9- * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10- * PARTICULAR PURPOSE.
11- */
12- using System . Collections . Generic ;
13- using System . Globalization ;
14- using System . Linq ;
15- using AngleSharp . Html . Dom ;
16- using DocumentFormat . OpenXml ;
17- using DocumentFormat . OpenXml . Wordprocessing ;
18-
19- namespace HtmlToOpenXml . Expressions ;
20-
21- /// <summary>
22- /// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
23- /// </summary>
24- sealed class FigureCaptionExpression ( IHtmlElement node ) : PhrasingElementExpression ( node )
25- {
26-
27- /// <inheritdoc/>
28- public override IEnumerable < OpenXmlElement > Interpret ( ParsingContext context )
29- {
30- ComposeStyles ( context ) ;
31- var childElements = Interpret ( context . CreateChild ( this ) , node . ChildNodes ) ;
32- if ( ! childElements . Any ( ) )
33- return [ ] ;
34-
35- var p = new Paragraph (
36- new Run (
37- new Text ( "Figure " ) { Space = SpaceProcessingModeValues . Preserve }
38- ) ,
39- new SimpleField (
40- new Run (
41- new Text ( AddFigureCaption ( context ) . ToString ( CultureInfo . InvariantCulture ) ) )
42- ) { Instruction = " SEQ Figure \\ * ARABIC " }
43- ) {
44- ParagraphProperties = new ParagraphProperties {
45- ParagraphStyleId = context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ,
46- KeepNext = new KeepNext ( )
47- }
48- } ;
49-
50- if ( childElements . First ( ) is Run run ) // any caption?
51- {
52- Text ? t = run . GetFirstChild < Text > ( ) ;
53- if ( t != null )
54- t . Text = " " + t . InnerText ; // append a space after the numero of the picture
55- }
56-
57- return [ p ] ;
58- }
59-
60- /// <summary>
61- /// Add a new figure caption to the document.
62- /// </summary>
63- /// <returns>Returns the id of the new figure caption.</returns>
64- private static int AddFigureCaption ( ParsingContext context )
65- {
66- var figCaptionRef = context . Properties < int ? > ( "figCaptionRef" ) ;
67- if ( ! figCaptionRef . HasValue )
68- {
69- figCaptionRef = 0 ;
70- foreach ( var p in context . MainPart . Document . Descendants < SimpleField > ( ) )
71- {
72- if ( p . Instruction == " SEQ Figure \\ * ARABIC " )
73- figCaptionRef ++ ;
74- }
75- }
76- figCaptionRef ++ ;
77-
78- context . Properties ( "figCaptionRef" , figCaptionRef ) ;
79- return figCaptionRef . Value ;
80- }
81- }
1+ /* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2+ *
3+ * This source is subject to the Microsoft Permissive License.
4+ * Please see the License.txt file for more information.
5+ * All other rights reserved.
6+ *
7+ * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8+ * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10+ * PARTICULAR PURPOSE.
11+ */
12+ using System . Collections . Generic ;
13+ using System . Globalization ;
14+ using System . Linq ;
15+ using AngleSharp . Dom ;
16+ using AngleSharp . Html . Dom ;
17+ using DocumentFormat . OpenXml ;
18+ using DocumentFormat . OpenXml . Wordprocessing ;
19+
20+ namespace HtmlToOpenXml . Expressions ;
21+
22+ /// <summary>
23+ /// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
24+ /// </summary>
25+ sealed class FigureCaptionExpression ( IHtmlElement node ) : BlockElementExpression ( node )
26+ {
27+
28+ /// <inheritdoc/>
29+ public override IEnumerable < OpenXmlElement > Interpret ( ParsingContext context )
30+ {
31+ ComposeStyles ( context ) ;
32+ var childElements = Interpret ( context . CreateChild ( this ) , node . ChildNodes ) ;
33+
34+ var figNumRef = new List < OpenXmlElement > ( )
35+ {
36+ new Run (
37+ new Text ( "Figure " ) { Space = SpaceProcessingModeValues . Preserve }
38+ ) ,
39+ new SimpleField (
40+ new Run (
41+ new Text ( AddFigureCaption ( context ) . ToString ( CultureInfo . InvariantCulture ) ) )
42+ )
43+ { Instruction = " SEQ Figure \\ * ARABIC " }
44+ } ;
45+
46+
47+ if ( ! childElements . Any ( ) )
48+ {
49+ return
50+ [ new Paragraph ( figNumRef )
51+ {
52+ ParagraphProperties = new ParagraphProperties
53+ {
54+ ParagraphStyleId = context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ,
55+ KeepNext = DetermineKeepNext ( node ) ,
56+ }
57+ } ] ;
58+ }
59+
60+ //Add the figure number references to the start of the first paragraph.
61+ if ( childElements . FirstOrDefault ( ) is Paragraph p )
62+ {
63+ var properties = p . GetFirstChild < ParagraphProperties > ( ) ;
64+ p . InsertAfter ( new Run (
65+ new Text ( " " ) { Space = SpaceProcessingModeValues . Preserve }
66+ ) , properties ) ;
67+ p . InsertAfter ( figNumRef [ 1 ] , properties ) ;
68+ p . InsertAfter ( figNumRef [ 0 ] , properties ) ;
69+ }
70+ else
71+ {
72+ //The first child of the figure caption is a table or something. Just prepend a new paragraph with the figure number reference.
73+ childElements =
74+ [
75+ new Paragraph ( figNumRef ) ,
76+ ..childElements
77+ ] ;
78+ }
79+
80+ foreach ( var paragraph in childElements . OfType < Paragraph > ( ) )
81+ {
82+ paragraph . ParagraphProperties ??= new ParagraphProperties ( ) ;
83+ paragraph . ParagraphProperties . ParagraphStyleId ??= context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ;
84+ //Keep caption paragraphs together.
85+ paragraph . ParagraphProperties . KeepNext = new KeepNext ( ) ;
86+ }
87+
88+ if ( childElements . OfType < Paragraph > ( ) . LastOrDefault ( ) is Paragraph lastPara )
89+ {
90+ lastPara . ParagraphProperties ! . KeepNext = DetermineKeepNext ( node ) ;
91+ }
92+
93+ return childElements ;
94+ }
95+
96+ /// <summary>
97+ /// Add a new figure caption to the document.
98+ /// </summary>
99+ /// <returns>Returns the id of the new figure caption.</returns>
100+ private static int AddFigureCaption ( ParsingContext context )
101+ {
102+ var figCaptionRef = context . Properties < int ? > ( "figCaptionRef" ) ;
103+ if ( ! figCaptionRef . HasValue )
104+ {
105+ figCaptionRef = 0 ;
106+ foreach ( var p in context . MainPart . Document . Descendants < SimpleField > ( ) )
107+ {
108+ if ( p . Instruction == " SEQ Figure \\ * ARABIC " )
109+ figCaptionRef ++ ;
110+ }
111+ }
112+ figCaptionRef ++ ;
113+
114+ context . Properties ( "figCaptionRef" , figCaptionRef ) ;
115+ return figCaptionRef . Value ;
116+ }
117+
118+ /// <summary>
119+ /// Determines whether the KeepNext property should apply this this caption.
120+ /// </summary>
121+ /// <param name="node"></param>
122+ /// <returns>A new <see cref="KeepNext"/> or null./></returns>
123+ private static KeepNext ? DetermineKeepNext ( IHtmlElement node )
124+ {
125+ // A caption at the end of a figure will have no next sibling.
126+ if ( node . NextElementSibling is null )
127+ {
128+ return null ;
129+ }
130+ return new ( ) ;
131+ }
132+ }
0 commit comments