Skip to content

Commit 2acd68c

Browse files
Merge branch 'docling-project:main' into main
2 parents 60494d6 + e00735d commit 2acd68c

12 files changed

Lines changed: 860 additions & 328 deletions

File tree

docling/backend/docx/latex/latex_dict.py

Lines changed: 118 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,87 @@
1-
"""
2-
Adapted from https://github.com/xiilei/dwml/blob/master/dwml/latex_dict.py
3-
On 23/01/2025
1+
"""LaTeX dictionary for OMML to LaTeX conversion.
2+
3+
This module contains constants and dictionaries used for converting Office Math
4+
Markup Language (OMML) to LaTeX format. It includes mappings for special characters,
5+
mathematical symbols, functions, and formatting templates.
6+
7+
Adapted from https://github.com/xiilei/dwml/blob/master/dwml/latex_dict.py on 23/01/2025
48
"""
59

6-
CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
10+
from typing import Final
711

8-
BLANK = ""
9-
BACKSLASH = "\\"
10-
ALN = "&"
12+
CHARS: Final[tuple[str, ...]] = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
1113

12-
CHR = {
14+
BLANK: Final[str] = ""
15+
BACKSLASH: Final[str] = "\\"
16+
ALN: Final[str] = "&"
17+
18+
# Characters that indicate mathematical expressions (not plain text)
19+
# Used to detect when spaces should be escaped in limit labels
20+
MATH_CHARS: Final[tuple[str, ...]] = (
21+
BACKSLASH,
22+
"<",
23+
">",
24+
"=",
25+
"+",
26+
"*",
27+
"/",
28+
"^",
29+
"_",
30+
"{",
31+
"}",
32+
)
33+
34+
CHR: Final[dict[str, str]] = {
1335
# Unicode : Latex Math Symbols
1436
# Top accents
15-
"\u0300": "\\grave{{{0}}}",
16-
"\u0301": "\\acute{{{0}}}",
17-
"\u0302": "\\hat{{{0}}}",
18-
"\u0303": "\\tilde{{{0}}}",
19-
"\u0304": "\\bar{{{0}}}",
20-
"\u0305": "\\overbar{{{0}}}",
21-
"\u0306": "\\breve{{{0}}}",
22-
"\u0307": "\\dot{{{0}}}",
23-
"\u0308": "\\ddot{{{0}}}",
24-
"\u0309": "\\ovhook{{{0}}}",
25-
"\u030a": "\\ocirc{{{0}}}}",
26-
"\u030c": "\\check{{{0}}}}",
27-
"\u0310": "\\candra{{{0}}}",
28-
"\u0312": "\\oturnedcomma{{{0}}}",
29-
"\u0315": "\\ocommatopright{{{0}}}",
30-
"\u031a": "\\droang{{{0}}}",
31-
"\u0338": "\\not{{{0}}}",
32-
"\u20d0": "\\leftharpoonaccent{{{0}}}",
33-
"\u20d1": "\\rightharpoonaccent{{{0}}}",
34-
"\u20d2": "\\vertoverlay{{{0}}}",
35-
"\u20d6": "\\overleftarrow{{{0}}}",
36-
"\u20d7": "\\vec{{{0}}}",
37-
"\u20db": "\\dddot{{{0}}}",
38-
"\u20dc": "\\ddddot{{{0}}}",
39-
"\u20e1": "\\overleftrightarrow{{{0}}}",
40-
"\u20e7": "\\annuity{{{0}}}",
41-
"\u20e9": "\\widebridgeabove{{{0}}}",
42-
"\u20f0": "\\asteraccent{{{0}}}",
37+
"\u0300": "\\grave{%s}",
38+
"\u0301": "\\acute{%s}",
39+
"\u0302": "\\hat{%s}",
40+
"\u0303": "\\tilde{%s}",
41+
"\u0304": "\\bar{%s}",
42+
"\u0305": "\\overbar{%s}",
43+
"\u0306": "\\breve{%s}",
44+
"\u0307": "\\dot{%s}",
45+
"\u0308": "\\ddot{%s}",
46+
"\u0309": "\\ovhook{%s}",
47+
"\u030a": "\\ocirc{%s}",
48+
"\u030c": "\\check{%s}",
49+
"\u0310": "\\candra{%s}",
50+
"\u0312": "\\oturnedcomma{%s}",
51+
"\u0315": "\\ocommatopright{%s}",
52+
"\u031a": "\\droang{%s}",
53+
"\u0338": "\\not{%s}",
54+
"\u20d0": "\\leftharpoonaccent{%s}",
55+
"\u20d1": "\\rightharpoonaccent{%s}",
56+
"\u20d2": "\\vertoverlay{%s}",
57+
"\u20d6": "\\overleftarrow{%s}",
58+
"\u20d7": "\\vec{%s}",
59+
"\u20db": "\\dddot{%s}",
60+
"\u20dc": "\\ddddot{%s}",
61+
"\u20e1": "\\overleftrightarrow{%s}",
62+
"\u20e7": "\\annuity{%s}",
63+
"\u20e9": "\\widebridgeabove{%s}",
64+
"\u20f0": "\\asteraccent{%s}",
4365
# Bottom accents
44-
"\u0330": "\\wideutilde{{{0}}}",
45-
"\u0331": "\\underbar{{{0}}}",
46-
"\u20e8": "\\threeunderdot{{{0}}}",
47-
"\u20ec": "\\underrightharpoondown{{{0}}}",
48-
"\u20ed": "\\underleftharpoondown{{{0}}}",
49-
"\u20ee": "\\underledtarrow{{{0}}}",
50-
"\u20ef": "\\underrightarrow{{{0}}}",
66+
"\u0330": "\\wideutilde{%s}",
67+
"\u0331": "\\underbar{%s}",
68+
"\u20e8": "\\threeunderdot{%s}",
69+
"\u20ec": "\\underrightharpoondown{%s}",
70+
"\u20ed": "\\underleftharpoondown{%s}",
71+
"\u20ee": "\\underledtarrow{%s}",
72+
"\u20ef": "\\underrightarrow{%s}",
5173
# Over | group
52-
"\u23b4": "\\overbracket{{{0}}}",
53-
"\u23dc": "\\overparen{{{0}}}",
54-
"\u23de": "\\overbrace{{{0}}}",
74+
"\u23b4": "\\overbracket{%s}",
75+
"\u23dc": "\\overparen{%s}",
76+
"\u23de": "\\overbrace{%s}",
5577
# Under| group
56-
"\u23b5": "\\underbracket{{{0}}}",
57-
"\u23dd": "\\underparen{{{0}}}",
58-
"\u23df": "\\underbrace{{{0}}}",
78+
"\u23b5": "\\underbracket{%s}",
79+
"\u23dd": "\\underparen{%s}",
80+
"\u23df": "\\underbrace{%s}",
5981
}
6082

61-
CHR_BO = {
62-
# Big operators,
83+
CHR_BO: Final[dict[str, str]] = {
84+
# Big operators
6385
"\u2140": "\\Bbbsum",
6486
"\u220f": "\\prod",
6587
"\u2210": "\\coprod",
@@ -79,7 +101,7 @@
79101
"\u2a02": "\\bigotimes",
80102
}
81103

82-
T = {
104+
T: Final[dict[str, str]] = {
83105
# Greek letters
84106
"\U0001d6fc": "\\alpha ",
85107
"\U0001d6fd": "\\beta ",
@@ -201,7 +223,7 @@
201223
"\U0001d467": "z",
202224
}
203225

204-
FUNC = {
226+
FUNC: Final[dict[str, str]] = {
205227
"sin": "\\sin({fe})",
206228
"cos": "\\cos({fe})",
207229
"tan": "\\tan({fe})",
@@ -234,57 +256,69 @@
234256
"Pr": "\\Pr({fe})",
235257
}
236258

237-
FUNC_PLACE = "{fe}"
259+
FUNC_PLACE: Final[str] = "{fe}"
238260

239-
BRK = "\\\\"
261+
BRK: Final[str] = "\\\\"
240262

241-
CHR_DEFAULT = {
242-
"ACC_VAL": "\\hat{{{0}}}",
263+
CHR_DEFAULT: Final[dict[str, str]] = {
264+
"ACC_VAL": "\\hat{%s}",
265+
"GROUPCHR_VAL": "\\underbrace{%s}",
243266
}
244267

245-
POS = {
246-
"top": "\\overline{{{0}}}", # not sure
247-
"bot": "\\underline{{{0}}}",
268+
# Grouping functions that can have subscripts/superscripts
269+
# These are bracket/brace functions, not limit functions
270+
GROUPING_FUNCS: Final[tuple[str, ...]] = (
271+
"\\underbrace",
272+
"\\overbrace",
273+
"\\underparen",
274+
"\\overparen",
275+
"\\underbracket",
276+
"\\overbracket",
277+
)
278+
279+
POS: Final[dict[str, str]] = {
280+
"top": "\\overline{%s}",
281+
"bot": "\\underline{%s}",
248282
}
249283

250-
POS_DEFAULT = {
251-
"BAR_VAL": "\\overline{{{0}}}",
284+
POS_DEFAULT: Final[dict[str, str]] = {
285+
"BAR_VAL": "\\overline{%s}",
252286
}
253287

254-
SUB = "_{{{0}}}"
288+
SUB: Final[str] = "_{%s}"
255289

256-
SUP = "^{{{0}}}"
290+
SUP: Final[str] = "^{%s}"
257291

258-
F = {
259-
"bar": "\\frac{{{num}}}{{{den}}}",
260-
"skw": r"^{{{num}}}/_{{{den}}}",
261-
"noBar": "\\genfrac{{}}{{}}{{0pt}}{{}}{{{num}}}{{{den}}}",
262-
"lin": "{{{num}}}/{{{den}}}",
292+
F: Final[dict[str, str]] = {
293+
"bar": "\\frac{%(num)s}{%(den)s}",
294+
"skw": r"^{%(num)s}/_{%(den)s}",
295+
"noBar": "\\genfrac{}{}{0pt}{}{%(num)s}{%(den)s}",
296+
"lin": "{%(num)s}/{%(den)s}",
263297
}
264-
F_DEFAULT = "\\frac{{{num}}}{{{den}}}"
298+
F_DEFAULT: Final[str] = "\\frac{%(num)s}{%(den)s}"
265299

266-
D = "\\left{left}{text}\\right{right}"
300+
D: Final[str] = "\\left%(left)s%(text)s\\right%(right)s"
267301

268-
D_DEFAULT = {
302+
D_DEFAULT: Final[dict[str, str]] = {
269303
"left": "(",
270304
"right": ")",
271305
"null": ".",
272306
}
273307

274-
RAD = "\\sqrt[{deg}]{{{text}}}"
275-
RAD_DEFAULT = "\\sqrt{{{text}}}"
276-
ARR = "{text}"
308+
RAD: Final[str] = "\\sqrt[%(deg)s]{%(text)s}"
309+
RAD_DEFAULT: Final[str] = "\\sqrt{%(text)s}"
310+
ARR: Final[str] = "%(text)s"
277311

278-
LIM_FUNC = {
279-
"lim": "\\lim_{{{lim}}}",
280-
"max": "\\max_{{{lim}}}",
281-
"min": "\\min_{{{lim}}}",
282-
"argmax": "\\operatorname{{argmax}}_{{{lim}}}",
283-
"argmin": "\\operatorname{{argmin}}_{{{lim}}}",
312+
LIM_FUNC: Final[dict[str, str]] = {
313+
"lim": "\\lim_{%(lim)s}",
314+
"max": "\\max_{%(lim)s}",
315+
"min": "\\min_{%(lim)s}",
316+
"argmax": "\\operatorname{argmax}_{%(lim)s}",
317+
"argmin": "\\operatorname{argmin}_{%(lim)s}",
284318
}
285319

286-
LIM_TO = ("\\rightarrow", "\\to")
320+
LIM_TO: Final[tuple[str, str]] = ("\\rightarrow", "\\to")
287321

288-
LIM_UPP = "\\overset{{{lim}}}{{{text}}}"
322+
LIM_UPP: Final[str] = "\\overset{%(lim)s}{%(text)s}"
289323

290-
M = "\\begin{{matrix}}{text}\\end{{matrix}}"
324+
M: Final[str] = "\\begin{matrix}%(text)s\\end{matrix}"

0 commit comments

Comments
 (0)