diff --git a/novelwriter/assets/i18n/project_en_GB.json b/novelwriter/assets/i18n/project_en_GB.json index 19cdda1ef..25de785c2 100644 --- a/novelwriter/assets/i18n/project_en_GB.json +++ b/novelwriter/assets/i18n/project_en_GB.json @@ -15,6 +15,7 @@ "Entities": "Entities", "Custom": "Custom", "New Page": "New Page", + "Contents": "Contents", "0": "Zero", "1": "One", "2": "Two", diff --git a/novelwriter/constants.py b/novelwriter/constants.py index 9f4c0e1e4..6d9ccf99f 100644 --- a/novelwriter/constants.py +++ b/novelwriter/constants.py @@ -354,6 +354,7 @@ class nwLabels: nwBuildFmt.ODT: QT_TRANSLATE_NOOP("Constant", "Open Document (.odt)"), nwBuildFmt.FODT: QT_TRANSLATE_NOOP("Constant", "Flat Open Document (.fodt)"), nwBuildFmt.DOCX: QT_TRANSLATE_NOOP("Constant", "Microsoft Word Document (.docx)"), + nwBuildFmt.EPUB: QT_TRANSLATE_NOOP("Constant", "Electronic Publication E-book (.epub)"), nwBuildFmt.HTML: QT_TRANSLATE_NOOP("Constant", "HTML 5 (.html)"), nwBuildFmt.NWD: QT_TRANSLATE_NOOP("Constant", "novelWriter Markup (.txt)"), nwBuildFmt.STD_MD: QT_TRANSLATE_NOOP("Constant", "Standard Markdown (.md)"), @@ -366,6 +367,7 @@ class nwLabels: nwBuildFmt.ODT: ".odt", nwBuildFmt.FODT: ".fodt", nwBuildFmt.DOCX: ".docx", + nwBuildFmt.EPUB: ".epub", nwBuildFmt.HTML: ".html", nwBuildFmt.NWD: ".txt", nwBuildFmt.STD_MD: ".md", diff --git a/novelwriter/core/docbuild.py b/novelwriter/core/docbuild.py index 0b10bfe4e..fe1bf48d8 100644 --- a/novelwriter/core/docbuild.py +++ b/novelwriter/core/docbuild.py @@ -37,6 +37,7 @@ from novelwriter.core.project import NWProject from novelwriter.enum import nwBuildFmt from novelwriter.error import formatException, logException +from novelwriter.formats.epub import ToEPub from novelwriter.formats.todocx import ToDocX from novelwriter.formats.tohtml import ToHtml from novelwriter.formats.tokenizer import Tokenizer @@ -174,6 +175,13 @@ def iterBuildDocument(self, path: Path, bFormat: nwBuildFmt) -> Iterable[tuple[i yield from self._iterBuild(makeObj, filtered) makeObj.closeDocument() + elif bFormat == nwBuildFmt.EPUB: + makeObj = ToEPub(self._project) + filtered = self._setupBuild(makeObj) + makeObj.initDocument() + yield from self._iterBuild(makeObj, filtered) + makeObj.closeDocument() + elif bFormat == nwBuildFmt.PDF: makeObj = ToQTextDocument(self._project) makeObj.disableAnchors() diff --git a/novelwriter/enum.py b/novelwriter/enum.py index a837d824e..322cd3ba6 100644 --- a/novelwriter/enum.py +++ b/novelwriter/enum.py @@ -193,13 +193,14 @@ class nwBuildFmt(Enum): ODT = 0 FODT = 1 DOCX = 2 - PDF = 3 - HTML = 4 - STD_MD = 5 - EXT_MD = 6 - NWD = 7 - J_HTML = 8 - J_NWD = 9 + EPUB = 3 + PDF = 4 + HTML = 5 + STD_MD = 6 + EXT_MD = 7 + NWD = 8 + J_HTML = 9 + J_NWD = 10 class nwStatusShape(Enum): diff --git a/novelwriter/formats/epub.py b/novelwriter/formats/epub.py new file mode 100644 index 000000000..e6b3ba2ca --- /dev/null +++ b/novelwriter/formats/epub.py @@ -0,0 +1,440 @@ +""" +novelWriter – EPUB Converter +============================ + +File History: +Created: 2025-04-05 [2.7b1] ToEPub + +This file is a part of novelWriter +Copyright (C) 2025 Veronica Berglyd Olsen and novelWriter contributors + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +""" +from __future__ import annotations + +import logging +import xml.etree.ElementTree as ET + +from datetime import datetime, timezone +from enum import Enum +from pathlib import Path +from zipfile import ZIP_DEFLATED, ZipFile + +from novelwriter.common import xmlElement, xmlIndent, xmlSubElem +from novelwriter.core.project import NWProject +from novelwriter.formats.shared import BlockTyp, processHtmlEntities +from novelwriter.formats.tokenizer import Tokenizer + +logger = logging.getLogger(__name__) + +X_MIME = "application/epub+zip" + +# Main XML NameSpaces +XML_NS = { + "dc": "http://purl.org/dc/elements/1.1/", + "xml": "http://www.w3.org/XML/1998/namespace", +} +for ns, uri in XML_NS.items(): + ET.register_namespace(ns, uri) + + +def _mkTag(ns: str, tag: str) -> str: + """Assemble namespace and tag name.""" + if uri := XML_NS.get(ns, ""): + return f"{{{uri}}}{tag}" + logger.warning("Missing xml namespace '%s'", ns) + return tag + + +class EPubType(Enum): + + COVER = 0 + FRONTMATTER = 1 + PART = 2 + CHAPTER = 3 + BACKMATTER = 4 + + +class ToEPub(Tokenizer): + + def __init__(self, project: NWProject) -> None: + super().__init__(project) + self._section = EPubSection(EPubType.FRONTMATTER) + self._sections = [self._section] + self._isFront = True + return + + ## + # Class Methods + ## + + def doConvert(self) -> None: + """Convert the list of text tokens into HTML.""" + if not self._isNovel: + return + + for tType, _, tText, tFmt, _ in self._blocks: + + tText, tFmt = processHtmlEntities(tText, tFmt) + + # Process Text Type + if tType == BlockTyp.TEXT: + self._section.text.append(f"

{tText}

") + + elif tType == BlockTyp.TITLE and self._isFront: + tHead = tText.replace("\n", "
") + self._section.text.append(f"

{tHead}

") + + elif tType in (BlockTyp.TITLE, EPubType.PART, BlockTyp.HEAD1): + eType = EPubType.CHAPTER if tType == BlockTyp.HEAD1 else EPubType.PART + self._section = EPubSection(eType) + self._sections.append(self._section) + + tHead = tText.replace("\n", "
") + self._section.setTitle(tHead, "H1") + self._isFront = False + + elif tType == BlockTyp.HEAD2: + tHead = tText.replace("\n", "
") + self._section.text.append(f"

{tHead}

") + + elif tType == BlockTyp.HEAD3: + tHead = tText.replace("\n", "
") + self._section.text.append(f"

{tHead}

") + + elif tType == BlockTyp.HEAD4: + tHead = tText.replace("\n", "
") + self._section.text.append(f"

{tHead}

") + + elif tType == BlockTyp.SEP: + self._section.text.append(f"

{tText}

") + + elif tType == BlockTyp.SKIP: + self._section.text.append("

 

") + + return + + def closeDocument(self) -> None: + """Run close document tasks.""" + # Generate section names and IDs, and prune empty ones + counts = dict.fromkeys(EPubType, 0) + sections = [] + for section in self._sections: + if section.hasContent(): + eType = section.epubType + counts[eType] += 1 + section.setSectionName(f"{eType.name.lower()}{counts[eType]}", len(sections) + 1) + sections.append(section) + self._sections = sections + return + + def saveDocument(self, path: Path) -> None: + """Save the data to a .epub file.""" + xContainer = self._containerXml() + xPackage = self._packageXml() + xToc = self._tocXml() + + def xmlToZip(name: str, root: ET.Element, zipObj: ZipFile) -> None: + xmlIndent(root) + zipObj.writestr(name, ET.tostring(root, encoding="utf-8", xml_declaration=True)) + + lang = self._dLocale.name() + with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=3) as outZip: + outZip.writestr("mimetype", X_MIME, compress_type=None, compresslevel=None) + xmlToZip("META-INF/container.xml", xContainer, outZip) + xmlToZip("OEBPS/package.opf", xPackage, outZip) + xmlToZip("OEBPS/toc.ncx", xToc, outZip) + outZip.writestr("OEBPS/nav.xhtml", self._generateNavPage()) + outZip.writestr("OEBPS/styles/stylesheet.css", self._generateStyleSheet()) + for section in self._sections: + outZip.writestr(f"OEBPS/xhtml/{section.name}.xhtml", section.sectionToXHtml(lang)) + + return + + ## + # Internal Functions + ## + + ## + # EPub Files + ## + + def _containerXml(self) -> ET.Element: + """Populate container.xml.""" + xRoot = xmlElement("container", attrib={ + "xmlns": "urn:oasis:names:tc:opendocument:xmlns:container", + "version": "1.0", + }) + xFiles = xmlSubElem(xRoot, "rootfiles") + xmlSubElem(xFiles, "rootfile", attrib={ + "full-path": "OEBPS/package.opf", + "media-type": "application/oebps-package+xml", + }) + return xRoot + + def _packageXml(self) -> ET.Element: + """Populate package.opf.""" + xRoot = xmlElement("package", attrib={ + "version": "3.0", + _mkTag("xml", "lang"): self._dLocale.name(), + "xmlns": "http://www.idpf.org/2007/opf", + "unique-identifier": "uid", + }) + + # Meta Data + timeStamp = datetime.now(timezone.utc).isoformat(sep="T", timespec="seconds") + xMetaData = xmlSubElem(xRoot, "metadata") + xmlSubElem(xMetaData, _mkTag("dc", "title"), self._project.data.name) + xmlSubElem(xMetaData, _mkTag("dc", "creator"), self._project.data.author) + xmlSubElem(xMetaData, _mkTag("dc", "publisher"), self._project.data.author) + xmlSubElem(xMetaData, _mkTag("dc", "language"), self._dLocale.name()) + xmlSubElem(xMetaData, _mkTag("dc", "date"), timeStamp[:10]) + xmlSubElem( + xMetaData, _mkTag("dc", "identifier"), + f"urn:uuid:{self._project.data.uuid}", attrib={"id": "uid"} + ) + xmlSubElem(xMetaData, "meta", "uuid", attrib={ + "refines": "#pub-id", + "property": "identifier-type", + "scheme": "xsd:string", + }) + xmlSubElem(xMetaData, "meta", "aut", attrib={ + "refines": "#creator", + "property": "role", + "scheme": "marc:relators", + }) + xmlSubElem(xMetaData, "meta", timeStamp[:10], attrib={ + "property": "dcterms:date", + }) + xmlSubElem(xMetaData, "meta", timeStamp, attrib={ + "property": "dcterms:modified", + }) + xmlSubElem(xMetaData, "meta", self._project.data.author, attrib={ + "property": "dcterms:creator", + }) + + xManifest = xmlSubElem(xRoot, "manifest") + xmlSubElem(xManifest, "item", attrib={ + "properties": "nav", + "id": "nav", + "href": "nav.xhtml", + "media-type": "application/xhtml+xml", + }) + for section in self._sections: + xmlSubElem(xManifest, "item", attrib={ + "id": section.sectionID, + "href": f"xhtml/{section.name}.xhtml", + "media-type": "application/xhtml+xml", + }) + + xSpine = xmlSubElem(xRoot, "spine", attrib={"toc": "toc"}) + for section in self._sections: + xmlSubElem(xSpine, "itemref", attrib={"idref": section.sectionID}) + + return xRoot + + def _tocXml(self) -> ET.Element: + """Populate toc.ncx. + See: https://en.wikipedia.org/wiki/EPUB#.ncx_file + """ + xRoot = xmlElement("ncx", attrib={ + "xmlns": "http://www.daisy.org/z3986/2005/ncx/", + "version": "2005-1", + _mkTag("xml", "lang"): self._dLocale.name(), + }) + + uid = f"urn:uuid:{self._project.data.uuid}" + xHead = xmlSubElem(xRoot, "head") + xmlSubElem(xHead, "meta", attrib={"name": "dtb:uid", "content": uid}) + xmlSubElem(xHead, "meta", attrib={"name": "dtb:depth", "content": "1"}) + xmlSubElem(xHead, "meta", attrib={"name": "dtb:totalPageCount", "content": "0"}) + xmlSubElem(xHead, "meta", attrib={"name": "dtb:maxPageNumber", "content": "0"}) + + xTitle = xmlSubElem(xRoot, "docTitle") + xmlSubElem(xTitle, "text", self._project.data.name) + xAuthor = xmlSubElem(xRoot, "docTitle") + xmlSubElem(xAuthor, "text", self._project.data.author) + + xNavMap = xmlSubElem(xRoot, "navMap") + playOrder = 0 + for section in self._sections: + if (title := section.title) and section.epubType != EPubType.COVER: + playOrder += 1 + xNavPoint = xmlSubElem(xNavMap, "navPoint", attrib={ + "class": "chapter", + "id": section.sectionID, + "playOrder": str(playOrder), + }) + xLabel = xmlSubElem(xNavPoint, "navLabel") + xmlSubElem(xLabel, "text", title) + xmlSubElem(xNavPoint, "content", attrib={"src": f"xhtml/{section.name}.xhtml"}) + + return xRoot + + def _generateNavPage(self) -> str: + """Generate the content XHtml page.""" + langCode = self._dLocale.name() + title = self._localLookup("Contents") + + xHtml = [''] + xHtml.append("") + xHtml.append( + '' + ) + xHtml.append("") + xHtml.append(f"{title}") + xHtml.append('') + xHtml.append('') + xHtml.append("") + xHtml.append("") + xHtml.append("
") + xHtml.append(f'

{title}

') + xHtml.append("
") + xHtml.append('") + xHtml.append("") + xHtml.append("") + return "\n".join(xHtml) + + def _generateStyleSheet(self) -> str: + """Generate the book style sheet.""" + styles = "H1 {}" + return styles + + +class EPubSection: + """A section of a book. + + This can be a chapter, partition, front matter, or back matter + documents. New sections are generated each time a H1 header is + encountered. + + See: https://www.w3.org/TR/epub-ssv/#sec-partitions + """ + __slots__ = ("_type", "_name", "_id", "_title", "_class", "_text") + + BODY_TYPE = { + EPubType.COVER: "cover", + EPubType.FRONTMATTER: "frontmatter", + EPubType.PART: "bodymatter", + EPubType.CHAPTER: "bodymatter", + EPubType.BACKMATTER: "backmatter", + } + SECTION_TYPE = { + EPubType.COVER: ("cover", ""), + EPubType.FRONTMATTER: ("frontmatter", ""), + EPubType.PART: ("part", " doc-part"), + EPubType.CHAPTER: ("chapter", "doc-chapter"), + EPubType.BACKMATTER: ("backmatter", ""), + } + + def __init__(self, eType: EPubType) -> None: + self._type = eType + self._name = "" + self._id = "" + self._title = "" + self._class = "" + self._text: list[str] = [] + return + + @property + def title(self) -> str: + """Return the section title.""" + return self._title + + @property + def text(self) -> list[str]: + """Return the text buffer.""" + return self._text + + @property + def epubType(self) -> EPubType: + """Return the epub:type of the section.""" + return self._type + + @property + def name(self) -> str: + """Return the name of the section.""" + return self._name + + @property + def sectionID(self) -> str: + """Return the section ID of the section.""" + return self._id + + ## + # Setters + ## + + def setSectionName(self, name: str, sid: int) -> None: + """Set the section name and number.""" + self._name = name + self._id = f"sec_{sid}" + return + + def setTitle(self, title: str, cssClass: str) -> None: + """Set the title. This is useful if the title isn't available + when the section is created. + """ + self._title = title + self._class = cssClass + return + + ## + # Methods + ## + + def hasContent(self) -> bool: + """Returns True if there is text.""" + return self._title != "" or len(self._text) > 0 + + def sectionToXHtml(self, langCode: str) -> str: + """Pack all content into an XHtml string.""" + eType, eRole = self.SECTION_TYPE.get(self._type, (None, None)) + sType = f' epub:type="{eType}"' if eType else "" + sRole = f' role="{eRole}"' if eRole else "" + hClass = f' class="{self._class}"' if self._class else "" + xHtml = [''] + xHtml.append("") + xHtml.append( + '' + ) + xHtml.append("") + if self._title: + xHtml.append(f"{self._title}") + xHtml.append('') + xHtml.append('') + xHtml.append("") + xHtml.append(f'') + xHtml.append(f'') + if self._title: + xHtml.append("
") + xHtml.append(f'{self._title}') + xHtml.append("
") + xHtml.extend(self._text) + xHtml.append("") + xHtml.append("") + xHtml.append("") + return "\n".join(xHtml) diff --git a/novelwriter/formats/shared.py b/novelwriter/formats/shared.py index 8a2ab5e61..64ff55c66 100644 --- a/novelwriter/formats/shared.py +++ b/novelwriter/formats/shared.py @@ -154,3 +154,31 @@ class BlockFmt(Flag): # A tokenized text block, consisting of: # type, header number, text, text formats, and block format T_Block = tuple[BlockTyp, str, str, T_Formats, BlockFmt] + + +# Formatters +# ========== + +def processHtmlEntities(text: str, fmt: T_Formats) -> tuple[str, T_Formats]: + """Replace < and > with HTML entities.""" + if fmt: + # If we have formatting, we must recompute the locations + cText = [] + i = 0 + for c in text: + if c == "<": + cText.append("<") + fmt = [(p + 3 if p > i else p, f, k) for p, f, k in fmt] + i += 4 + elif c == ">": + cText.append(">") + fmt = [(p + 3 if p > i else p, f, k) for p, f, k in fmt] + i += 4 + else: + cText.append(c) + i += 1 + text = "".join(cText) + else: + # If we don't have formatting, we can do a plain replace + text = text.replace("<", "<").replace(">", ">") + return text, fmt diff --git a/novelwriter/formats/tohtml.py b/novelwriter/formats/tohtml.py index fb2c1ee51..20a3bc489 100644 --- a/novelwriter/formats/tohtml.py +++ b/novelwriter/formats/tohtml.py @@ -32,7 +32,9 @@ from novelwriter.common import formatTimeStamp from novelwriter.constants import nwHtmlUnicode, nwStyles from novelwriter.core.project import NWProject -from novelwriter.formats.shared import BlockFmt, BlockTyp, T_Formats, TextFmt, stripEscape +from novelwriter.formats.shared import ( + BlockFmt, BlockTyp, T_Formats, TextFmt, processHtmlEntities, stripEscape +) from novelwriter.formats.tokenizer import Tokenizer from novelwriter.types import FONT_STYLE, FONT_WEIGHTS, QtHexRgb @@ -133,27 +135,7 @@ def doConvert(self) -> None: lines = [] for tType, tMeta, tText, tFmt, tStyle in self._blocks: - # Replace < and > with HTML entities - if tFmt: - # If we have formatting, we must recompute the locations - cText = [] - i = 0 - for c in tText: - if c == "<": - cText.append("<") - tFmt = [(p + 3 if p > i else p, f, k) for p, f, k in tFmt] - i += 4 - elif c == ">": - cText.append(">") - tFmt = [(p + 3 if p > i else p, f, k) for p, f, k in tFmt] - i += 4 - else: - cText.append(c) - i += 1 - tText = "".join(cText) - else: - # If we don't have formatting, we can do a plain replace - tText = tText.replace("<", "<").replace(">", ">") + tText, tFmt = processHtmlEntities(tText, tFmt) # Styles aStyle = [] diff --git a/novelwriter/formats/tokenizer.py b/novelwriter/formats/tokenizer.py index f44139a31..fd5b481d7 100644 --- a/novelwriter/formats/tokenizer.py +++ b/novelwriter/formats/tokenizer.py @@ -458,6 +458,7 @@ def addRootHeading(self, tHandle: str) -> None: if (item := self._project.tree[tHandle]) and item.isRootType(): self._handle = tHandle + self._isNovel = item.documentAllowed() style = BlockFmt.CENTRE if self._isFirst: self._isFirst = False