Skip to content

Commit b932f04

Browse files
Merge pull request #45 from fleetingbytes/develop
close #42, #44
2 parents ea0d8a9 + 24ec266 commit b932f04

File tree

11 files changed

+154
-181
lines changed

11 files changed

+154
-181
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22

33
<!-- towncrier release notes start -->
44

5+
## 0.9.4 (2024-11-10)
6+
7+
8+
### Bugfixes
9+
10+
- add missing import statement in `html_decapsulator.py` [#42](https://github.com/fleetingbytes/rtfparse/issues/42)
11+
12+
13+
### Development Details
14+
15+
- replace `black` and `isort` with `ruff` [#44](https://github.com/fleetingbytes/rtfparse/issues/44)
16+
517
## 0.9.3 (2024-11-01)
618

719

pyproject.toml

Lines changed: 83 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,13 @@ no-cov = "cov --no-cov {args}"
6767

6868
[tool.hatch.envs.style]
6969
dependencies = [
70-
"black",
71-
"isort",
70+
"ruff",
7271
]
7372

7473
[tool.hatch.envs.style.scripts]
7574
fmt = [
76-
"isort .",
77-
"black .",
75+
"ruff format",
76+
"ruff check",
7877
]
7978

8079
[tool.hatch.envs.tc]
@@ -108,11 +107,87 @@ exclude_lines = [
108107
"if TYPE_CHECKING:",
109108
]
110109

111-
[tool.black]
112-
line-length = 102
110+
[tool.ruff]
111+
# Exclude a variety of commonly ignored directories.
112+
exclude = [
113+
".bzr",
114+
".direnv",
115+
".eggs",
116+
".git",
117+
".git-rewrite",
118+
".hg",
119+
".ipynb_checkpoints",
120+
".mypy_cache",
121+
".nox",
122+
".pants.d",
123+
".pyenv",
124+
".pytest_cache",
125+
".pytype",
126+
".ruff_cache",
127+
".svn",
128+
".tox",
129+
".venv",
130+
".vscode",
131+
"__pypackages__",
132+
"_build",
133+
"buck-out",
134+
"build",
135+
"dist",
136+
"node_modules",
137+
"site-packages",
138+
"venv",
139+
]
140+
141+
# Same as Black.
142+
line-length = 150
143+
indent-width = 4
144+
145+
# Assume Python 3.10
146+
target-version = "py310"
147+
148+
[tool.ruff.lint]
149+
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
150+
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
151+
# McCabe complexity (`C901`) by default.
152+
select = ["E4", "E7", "E9", "F"]
153+
ignore = []
154+
155+
# Allow fix for all enabled rules (when `--fix`) is provided.
156+
fixable = ["ALL"]
157+
unfixable = []
158+
159+
# Allow unused variables when underscore-prefixed.
160+
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
161+
162+
[tool.ruff.lint.pycodestyle]
163+
max-line-length = 150
164+
165+
[tool.ruff.format]
166+
# Like Black, use double quotes for strings.
167+
quote-style = "double"
168+
169+
# Like Black, indent with spaces, rather than tabs.
170+
indent-style = "space"
171+
172+
# Like Black, respect magic trailing commas.
173+
skip-magic-trailing-comma = false
174+
175+
# Like Black, automatically detect the appropriate line ending.
176+
line-ending = "auto"
177+
178+
# Enable auto-formatting of code examples in docstrings. Markdown,
179+
# reStructuredText code/literal blocks and doctests are all supported.
180+
#
181+
# This is currently disabled by default, but it is planned for this
182+
# to be opt-out in the future.
183+
docstring-code-format = true
113184

114-
[tool.isort]
115-
line_length = 102
185+
# Set the line length limit used when formatting code snippets in
186+
# docstrings.
187+
#
188+
# This only has an effect when the `docstring-code-format` setting is
189+
# enabled.
190+
docstring-code-line-length = "dynamic"
116191

117192
[tool.towncrier]
118193
name = "rtfparse"

src/rtfparse/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#!/usr/bin/env python
22

33

4-
__version__ = "0.9.3"
4+
__version__ = "0.9.4"

src/rtfparse/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33

44
# Towncrier needs version
5-
from rtfparse.__about__ import __version__
5+
# from rtfparse.__about__ import __version__
6+
__all__ = ["rtfparse.__about__.__version__"]
67

78
if __name__ == "__main__":
89
from rtfparse.cli import main

src/rtfparse/cli.py

Lines changed: 15 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,10 @@ def setup_logger(directory: Path) -> logging.Logger:
2424
"""
2525
try:
2626
provide_dir(directory)
27-
logger_config = logging_conf.create_dict_config(
28-
directory, "rtfparse.debug.log", "rtfparse.info.log", "rtfparse.errors.log"
29-
)
27+
logger_config = logging_conf.create_dict_config(directory, "rtfparse.debug.log", "rtfparse.info.log", "rtfparse.errors.log")
3028
except FileExistsError:
31-
logger.error(
32-
f"Failed to create the directory `{str(directory)}` because it already exists as a file."
33-
)
34-
logger.error(f"Please create the directory `{str(directory)}`")
29+
print(f"Failed to create the directory `{str(directory)}` because it already exists as a file.")
30+
print(f"Please create the directory `{str(directory)}`")
3531
finally:
3632
logging.config.dictConfig(logger_config)
3733
logger = logging.getLogger(__name__)
@@ -46,49 +42,22 @@ def argument_parser() -> ArgumentParser:
4642
Creates an argument parser for command line arguments
4743
"""
4844
parser = ArgumentParser(description="RTF parser", prog="rtfparse")
49-
parser.add_argument(
50-
"-v",
51-
"--version",
52-
action="version",
53-
version=" ".join(("%(prog)s", __version__)),
54-
help="print out rtfparse version and exit",
55-
)
56-
parser.add_argument(
57-
"-r", "--rtf-file", action="store", metavar="PATH", type=Path, help="path to the rtf file"
58-
)
59-
parser.add_argument(
60-
"-m",
61-
"--msg-file",
62-
action="store",
63-
metavar="PATH",
64-
type=Path,
65-
help="Parse RTF from MS Outlook's .msg file",
66-
)
67-
parser.add_argument(
68-
"-d", "--decapsulate-html", action="store_true", help="Decapsulate HTML from RTF"
69-
)
70-
parser.add_argument(
71-
"-i", "--embed-img", action="store_true", help="Embed images from email to HTML"
72-
)
73-
parser.add_argument(
74-
"-o", "--output-file", metavar="PATH", type=Path, help="path to the desired output file"
75-
)
76-
parser.add_argument(
77-
"-a",
78-
"--attachments-dir",
79-
metavar="PATH",
80-
type=Path,
81-
help="path to directory where to save email attachments",
82-
)
45+
parser.add_argument("-v", "--version", action="version", version=" ".join(("%(prog)s", __version__)), help="print out rtfparse version and exit")
46+
parser.add_argument("-r", "--rtf-file", action="store", metavar="PATH", type=Path, help="path to the rtf file")
47+
parser.add_argument("-m", "--msg-file", action="store", metavar="PATH", type=Path, help="Parse RTF from MS Outlook's .msg file")
48+
parser.add_argument("-d", "--decapsulate-html", action="store_true", help="Decapsulate HTML from RTF")
49+
parser.add_argument("-i", "--embed-img", action="store_true", help="Embed images from email to HTML")
50+
parser.add_argument("-o", "--output-file", metavar="PATH", type=Path, help="path to the desired output file")
51+
parser.add_argument("-a", "--attachments-dir", metavar="PATH", type=Path, help="path to directory where to save email attachments")
8352
return parser
8453

8554

8655
def decapsulate(rp: Rtf_Parser, target_file: Path) -> None:
8756
renderer = HTML_Decapsulator()
8857
with open(target_file, mode="w", encoding="utf-8") as htmlfile:
89-
logger.info(f"Rendering the encapsulated HTML")
58+
logger.info("Rendering the encapsulated HTML")
9059
renderer.render(rp.parsed, htmlfile)
91-
logger.info(f"Encapsulated HTML rendered")
60+
logger.info("Encapsulated HTML rendered")
9261

9362

9463
def run(cli_args: Namespace) -> None:
@@ -101,9 +70,7 @@ def run(cli_args: Namespace) -> None:
10170
if cli_args.attachments_dir:
10271
provide_dir(cli_args.attachments_dir)
10372
for attachment in msg.attachments:
104-
with open(
105-
cli_args.attachments_dir / f"{attachment.longFilename}", mode="wb"
106-
) as att_file:
73+
with open(cli_args.attachments_dir / f"{attachment.longFilename}", mode="wb") as att_file:
10774
att_file.write(attachment.data)
10875
decompressed_rtf = cr.decompress(msg.compressedRtf)
10976
with open(cli_args.msg_file.with_suffix(".rtf"), mode="wb") as email_rtf:
@@ -119,7 +86,7 @@ def main() -> None:
11986
"""
12087
Entry point for any component start from the commmand line
12188
"""
122-
logger.debug(f"rtfparse started")
89+
logger.debug("rtfparse started")
12390
parser = argument_parser()
12491
argcomplete.autocomplete(parser)
12592
cli_args = parser.parse_args()
@@ -128,4 +95,4 @@ def main() -> None:
12895
run(cli_args)
12996
except Exception as err:
13097
logger.exception(f"Uncaught exception {repr(err)} occurred.")
131-
logger.debug(f"rtfparse ended")
98+
logger.debug("rtfparse ended")

src/rtfparse/entities.py

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import io
55
import logging
6-
import re
76

87
# Own modules
98
from rtfparse import re_patterns, utils
@@ -19,9 +18,7 @@
1918
GROUP_START = BACKSLASH + IGNORABLE
2019
MAX_CW_LETTERS = 32 # As specified in RTF Spec
2120
INTEGER_MAGNITUDE = 32 # As specified in RTF Spec
22-
PLAIN_TEXT = CONTROL_WORD = (
23-
BACKSLASH + MAX_CW_LETTERS + MINUS + len(str((1 << INTEGER_MAGNITUDE) // 2)) + DELIMITER
24-
)
21+
PLAIN_TEXT = CONTROL_WORD = BACKSLASH + MAX_CW_LETTERS + MINUS + len(str((1 << INTEGER_MAGNITUDE) // 2)) + DELIMITER
2522

2623

2724
class Entity:
@@ -37,23 +34,23 @@ def probe(cls, pattern: re_patterns.Bytes_Regex, file: io.BufferedReader) -> Byt
3734
logger.debug(f"{probed = }")
3835
file.seek(original_position)
3936
logger.debug(f"Probe returned to position {file.tell()}")
40-
if match := re_patterns.group_start.match(probed):
37+
if re_patterns.group_start.match(probed):
4138
result = Bytestring_Type.GROUP_START
42-
elif match := re_patterns.group_end.match(probed):
39+
elif re_patterns.group_end.match(probed):
4340
result = Bytestring_Type.GROUP_END
44-
elif match := re_patterns.control_word.match(probed):
41+
elif re_patterns.control_word.match(probed):
4542
result = Bytestring_Type.CONTROL_WORD
46-
elif match := re_patterns.control_symbol.match(probed):
43+
elif re_patterns.control_symbol.match(probed):
4744
result = Bytestring_Type.CONTROL_SYMBOL
48-
elif match := re_patterns.plain_text.match(probed):
45+
elif re_patterns.plain_text.match(probed):
4946
result = Bytestring_Type.PLAIN_TEXT
5047
else:
51-
logger.debug(f"This does not match anything, it's probably a newline, moving on")
48+
logger.debug("This does not match anything, it's probably a newline, moving on")
5249
original_position += 1
5350
file.seek(original_position)
5451
logger.debug(f"Probe moved to position {file.tell()}")
5552
if not probed:
56-
logger.debug(f"Reached unexpected end of file.")
53+
logger.debug("Reached unexpected end of file.")
5754
result = Bytestring_Type.GROUP_END
5855
break
5956
continue
@@ -85,16 +82,14 @@ def __init__(self, encoding: str, file: io.BufferedReader) -> None:
8582
logger.debug(f"Final {self.control_name = }")
8683
target_position = self.start_position + match.span()[1]
8784
if match.group("other"):
88-
logger.debug(
89-
f"Delimiter is {match.group('other').decode(self.encoding)}, len: {len(match.group('delimiter'))}"
90-
)
85+
logger.debug(f"Delimiter is {match.group('other').decode(self.encoding)}, len: {len(match.group('delimiter'))}")
9186
target_position -= len(match.group("delimiter"))
9287
file.seek(target_position)
9388
# handle \binN:
9489
if self.control_name == "bin":
9590
self.bindata = file.read(utils.twos_complement(self.parameter, INTEGER_MAGNITUDE))
9691
else:
97-
logger.warning(f"Missing Control Word")
92+
logger.warning("Missing Control Word")
9893
file.seek(self.start_position)
9994

10095
def __repr__(self) -> str:
@@ -112,9 +107,7 @@ def __init__(self, encoding: str, file: io.BufferedReader) -> None:
112107
if self.text == "'":
113108
self.char = file.read(SYMBOL).decode(self.encoding)
114109
self.text = bytes((int(self.char, base=16),)).decode(self.encoding)
115-
logger.debug(
116-
f"Encountered escaped ANSI character, read two more bytes: {self.char}, character: {self.text}"
117-
)
110+
logger.debug(f"Encountered escaped ANSI character, read two more bytes: {self.char}, character: {self.text}")
118111
if self.text in "\\{}":
119112
file.seek(file.tell() - SYMBOL)
120113

@@ -127,16 +120,14 @@ def __init__(self, encoding: str, file: io.BufferedReader) -> None:
127120
super().__init__()
128121
self.encoding = encoding
129122
self.text = ""
130-
logger.debug(f"Constructing Plain_Text")
123+
logger.debug("Constructing Plain_Text")
131124
while True:
132125
self.start_position = file.tell()
133126
read = file.read(PLAIN_TEXT)
134-
logger.debug(
135-
f"Read file from {self.start_position} to position {file.tell()}, read: {read}"
136-
)
127+
logger.debug(f"Read file from {self.start_position} to position {file.tell()}, read: {read}")
137128
# see if we have read all the plain text there is:
138129
if match := re_patterns.plain_text.match(read):
139-
logger.debug(f"This matches the plain text pattern")
130+
logger.debug("This matches the plain text pattern")
140131
_text = match.group("text").decode(self.encoding)
141132
logger.debug(f"{_text = }")
142133
self.text = "".join((self.text, _text))
@@ -158,7 +149,7 @@ def __repr__(self) -> str:
158149
class Group(Entity):
159150
def __init__(self, encoding: str, file: io.BufferedReader) -> None:
160151
super().__init__()
161-
logger.debug(f"Group.__init__")
152+
logger.debug("Group.__init__")
162153
self.encoding = encoding
163154
self.known = False
164155
self.name = "unknown"
@@ -177,9 +168,7 @@ def __init__(self, encoding: str, file: io.BufferedReader) -> None:
177168
file.seek(self.start_position + GROUP_START - IGNORABLE)
178169
logger.debug(f"Returned to position {file.tell()}")
179170
else:
180-
logger.warning(
181-
utils.warn(f"Expected a group but found no group start. Creating unknown group")
182-
)
171+
logger.warning(utils.warn("Expected a group but found no group start. Creating unknown group"))
183172
file.seek(self.start_position)
184173
while True:
185174
probed = self.probe(re_patterns.probe, file)

0 commit comments

Comments
 (0)