Skip to content

Commit

Permalink
Merge pull request #45 from fleetingbytes/develop
Browse files Browse the repository at this point in the history
close #42, #44
  • Loading branch information
fleetingbytes authored Nov 10, 2024
2 parents ea0d8a9 + 24ec266 commit b932f04
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 181 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@

<!-- towncrier release notes start -->

## 0.9.4 (2024-11-10)


### Bugfixes

- add missing import statement in `html_decapsulator.py` [#42](https://github.com/fleetingbytes/rtfparse/issues/42)


### Development Details

- replace `black` and `isort` with `ruff` [#44](https://github.com/fleetingbytes/rtfparse/issues/44)

## 0.9.3 (2024-11-01)


Expand Down
91 changes: 83 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,13 @@ no-cov = "cov --no-cov {args}"

[tool.hatch.envs.style]
dependencies = [
"black",
"isort",
"ruff",
]

[tool.hatch.envs.style.scripts]
fmt = [
"isort .",
"black .",
"ruff format",
"ruff check",
]

[tool.hatch.envs.tc]
Expand Down Expand Up @@ -108,11 +107,87 @@ exclude_lines = [
"if TYPE_CHECKING:",
]

[tool.black]
line-length = 102
[tool.ruff]
# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".ipynb_checkpoints",
".mypy_cache",
".nox",
".pants.d",
".pyenv",
".pytest_cache",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
".vscode",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"site-packages",
"venv",
]

# Same as Black.
line-length = 150
indent-width = 4

# Assume Python 3.10
target-version = "py310"

[tool.ruff.lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F"]
ignore = []

# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.lint.pycodestyle]
max-line-length = 150

[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"

# Like Black, indent with spaces, rather than tabs.
indent-style = "space"

# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"

# Enable auto-formatting of code examples in docstrings. Markdown,
# reStructuredText code/literal blocks and doctests are all supported.
#
# This is currently disabled by default, but it is planned for this
# to be opt-out in the future.
docstring-code-format = true

[tool.isort]
line_length = 102
# Set the line length limit used when formatting code snippets in
# docstrings.
#
# This only has an effect when the `docstring-code-format` setting is
# enabled.
docstring-code-line-length = "dynamic"

[tool.towncrier]
name = "rtfparse"
Expand Down
2 changes: 1 addition & 1 deletion src/rtfparse/__about__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python


__version__ = "0.9.3"
__version__ = "0.9.4"
3 changes: 2 additions & 1 deletion src/rtfparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@


# Towncrier needs version
from rtfparse.__about__ import __version__
# from rtfparse.__about__ import __version__
__all__ = ["rtfparse.__about__.__version__"]

if __name__ == "__main__":
from rtfparse.cli import main
Expand Down
63 changes: 15 additions & 48 deletions src/rtfparse/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,10 @@ def setup_logger(directory: Path) -> logging.Logger:
"""
try:
provide_dir(directory)
logger_config = logging_conf.create_dict_config(
directory, "rtfparse.debug.log", "rtfparse.info.log", "rtfparse.errors.log"
)
logger_config = logging_conf.create_dict_config(directory, "rtfparse.debug.log", "rtfparse.info.log", "rtfparse.errors.log")
except FileExistsError:
logger.error(
f"Failed to create the directory `{str(directory)}` because it already exists as a file."
)
logger.error(f"Please create the directory `{str(directory)}`")
print(f"Failed to create the directory `{str(directory)}` because it already exists as a file.")
print(f"Please create the directory `{str(directory)}`")
finally:
logging.config.dictConfig(logger_config)
logger = logging.getLogger(__name__)
Expand All @@ -46,49 +42,22 @@ def argument_parser() -> ArgumentParser:
Creates an argument parser for command line arguments
"""
parser = ArgumentParser(description="RTF parser", prog="rtfparse")
parser.add_argument(
"-v",
"--version",
action="version",
version=" ".join(("%(prog)s", __version__)),
help="print out rtfparse version and exit",
)
parser.add_argument(
"-r", "--rtf-file", action="store", metavar="PATH", type=Path, help="path to the rtf file"
)
parser.add_argument(
"-m",
"--msg-file",
action="store",
metavar="PATH",
type=Path,
help="Parse RTF from MS Outlook's .msg file",
)
parser.add_argument(
"-d", "--decapsulate-html", action="store_true", help="Decapsulate HTML from RTF"
)
parser.add_argument(
"-i", "--embed-img", action="store_true", help="Embed images from email to HTML"
)
parser.add_argument(
"-o", "--output-file", metavar="PATH", type=Path, help="path to the desired output file"
)
parser.add_argument(
"-a",
"--attachments-dir",
metavar="PATH",
type=Path,
help="path to directory where to save email attachments",
)
parser.add_argument("-v", "--version", action="version", version=" ".join(("%(prog)s", __version__)), help="print out rtfparse version and exit")
parser.add_argument("-r", "--rtf-file", action="store", metavar="PATH", type=Path, help="path to the rtf file")
parser.add_argument("-m", "--msg-file", action="store", metavar="PATH", type=Path, help="Parse RTF from MS Outlook's .msg file")
parser.add_argument("-d", "--decapsulate-html", action="store_true", help="Decapsulate HTML from RTF")
parser.add_argument("-i", "--embed-img", action="store_true", help="Embed images from email to HTML")
parser.add_argument("-o", "--output-file", metavar="PATH", type=Path, help="path to the desired output file")
parser.add_argument("-a", "--attachments-dir", metavar="PATH", type=Path, help="path to directory where to save email attachments")
return parser


def decapsulate(rp: Rtf_Parser, target_file: Path) -> None:
renderer = HTML_Decapsulator()
with open(target_file, mode="w", encoding="utf-8") as htmlfile:
logger.info(f"Rendering the encapsulated HTML")
logger.info("Rendering the encapsulated HTML")
renderer.render(rp.parsed, htmlfile)
logger.info(f"Encapsulated HTML rendered")
logger.info("Encapsulated HTML rendered")


def run(cli_args: Namespace) -> None:
Expand All @@ -101,9 +70,7 @@ def run(cli_args: Namespace) -> None:
if cli_args.attachments_dir:
provide_dir(cli_args.attachments_dir)
for attachment in msg.attachments:
with open(
cli_args.attachments_dir / f"{attachment.longFilename}", mode="wb"
) as att_file:
with open(cli_args.attachments_dir / f"{attachment.longFilename}", mode="wb") as att_file:
att_file.write(attachment.data)
decompressed_rtf = cr.decompress(msg.compressedRtf)
with open(cli_args.msg_file.with_suffix(".rtf"), mode="wb") as email_rtf:
Expand All @@ -119,7 +86,7 @@ def main() -> None:
"""
Entry point for any component start from the commmand line
"""
logger.debug(f"rtfparse started")
logger.debug("rtfparse started")
parser = argument_parser()
argcomplete.autocomplete(parser)
cli_args = parser.parse_args()
Expand All @@ -128,4 +95,4 @@ def main() -> None:
run(cli_args)
except Exception as err:
logger.exception(f"Uncaught exception {repr(err)} occurred.")
logger.debug(f"rtfparse ended")
logger.debug("rtfparse ended")
43 changes: 16 additions & 27 deletions src/rtfparse/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import io
import logging
import re

# Own modules
from rtfparse import re_patterns, utils
Expand All @@ -19,9 +18,7 @@
GROUP_START = BACKSLASH + IGNORABLE
MAX_CW_LETTERS = 32 # As specified in RTF Spec
INTEGER_MAGNITUDE = 32 # As specified in RTF Spec
PLAIN_TEXT = CONTROL_WORD = (
BACKSLASH + MAX_CW_LETTERS + MINUS + len(str((1 << INTEGER_MAGNITUDE) // 2)) + DELIMITER
)
PLAIN_TEXT = CONTROL_WORD = BACKSLASH + MAX_CW_LETTERS + MINUS + len(str((1 << INTEGER_MAGNITUDE) // 2)) + DELIMITER


class Entity:
Expand All @@ -37,23 +34,23 @@ def probe(cls, pattern: re_patterns.Bytes_Regex, file: io.BufferedReader) -> Byt
logger.debug(f"{probed = }")
file.seek(original_position)
logger.debug(f"Probe returned to position {file.tell()}")
if match := re_patterns.group_start.match(probed):
if re_patterns.group_start.match(probed):
result = Bytestring_Type.GROUP_START
elif match := re_patterns.group_end.match(probed):
elif re_patterns.group_end.match(probed):
result = Bytestring_Type.GROUP_END
elif match := re_patterns.control_word.match(probed):
elif re_patterns.control_word.match(probed):
result = Bytestring_Type.CONTROL_WORD
elif match := re_patterns.control_symbol.match(probed):
elif re_patterns.control_symbol.match(probed):
result = Bytestring_Type.CONTROL_SYMBOL
elif match := re_patterns.plain_text.match(probed):
elif re_patterns.plain_text.match(probed):
result = Bytestring_Type.PLAIN_TEXT
else:
logger.debug(f"This does not match anything, it's probably a newline, moving on")
logger.debug("This does not match anything, it's probably a newline, moving on")
original_position += 1
file.seek(original_position)
logger.debug(f"Probe moved to position {file.tell()}")
if not probed:
logger.debug(f"Reached unexpected end of file.")
logger.debug("Reached unexpected end of file.")
result = Bytestring_Type.GROUP_END
break
continue
Expand Down Expand Up @@ -85,16 +82,14 @@ def __init__(self, encoding: str, file: io.BufferedReader) -> None:
logger.debug(f"Final {self.control_name = }")
target_position = self.start_position + match.span()[1]
if match.group("other"):
logger.debug(
f"Delimiter is {match.group('other').decode(self.encoding)}, len: {len(match.group('delimiter'))}"
)
logger.debug(f"Delimiter is {match.group('other').decode(self.encoding)}, len: {len(match.group('delimiter'))}")
target_position -= len(match.group("delimiter"))
file.seek(target_position)
# handle \binN:
if self.control_name == "bin":
self.bindata = file.read(utils.twos_complement(self.parameter, INTEGER_MAGNITUDE))
else:
logger.warning(f"Missing Control Word")
logger.warning("Missing Control Word")
file.seek(self.start_position)

def __repr__(self) -> str:
Expand All @@ -112,9 +107,7 @@ def __init__(self, encoding: str, file: io.BufferedReader) -> None:
if self.text == "'":
self.char = file.read(SYMBOL).decode(self.encoding)
self.text = bytes((int(self.char, base=16),)).decode(self.encoding)
logger.debug(
f"Encountered escaped ANSI character, read two more bytes: {self.char}, character: {self.text}"
)
logger.debug(f"Encountered escaped ANSI character, read two more bytes: {self.char}, character: {self.text}")
if self.text in "\\{}":
file.seek(file.tell() - SYMBOL)

Expand All @@ -127,16 +120,14 @@ def __init__(self, encoding: str, file: io.BufferedReader) -> None:
super().__init__()
self.encoding = encoding
self.text = ""
logger.debug(f"Constructing Plain_Text")
logger.debug("Constructing Plain_Text")
while True:
self.start_position = file.tell()
read = file.read(PLAIN_TEXT)
logger.debug(
f"Read file from {self.start_position} to position {file.tell()}, read: {read}"
)
logger.debug(f"Read file from {self.start_position} to position {file.tell()}, read: {read}")
# see if we have read all the plain text there is:
if match := re_patterns.plain_text.match(read):
logger.debug(f"This matches the plain text pattern")
logger.debug("This matches the plain text pattern")
_text = match.group("text").decode(self.encoding)
logger.debug(f"{_text = }")
self.text = "".join((self.text, _text))
Expand All @@ -158,7 +149,7 @@ def __repr__(self) -> str:
class Group(Entity):
def __init__(self, encoding: str, file: io.BufferedReader) -> None:
super().__init__()
logger.debug(f"Group.__init__")
logger.debug("Group.__init__")
self.encoding = encoding
self.known = False
self.name = "unknown"
Expand All @@ -177,9 +168,7 @@ def __init__(self, encoding: str, file: io.BufferedReader) -> None:
file.seek(self.start_position + GROUP_START - IGNORABLE)
logger.debug(f"Returned to position {file.tell()}")
else:
logger.warning(
utils.warn(f"Expected a group but found no group start. Creating unknown group")
)
logger.warning(utils.warn("Expected a group but found no group start. Creating unknown group"))
file.seek(self.start_position)
while True:
probed = self.probe(re_patterns.probe, file)
Expand Down
Loading

0 comments on commit b932f04

Please sign in to comment.