diff --git a/.python-version b/.python-version index 424e179..c70edfa 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.6.8 +3.11.13 diff --git a/.travis.yml b/.travis.yml index c2d6da6..b18481e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,9 @@ language: python python: - - "3.6.5" + - "3.11.13" # command to install dependencies -install: "pip install -r requirements-dev.txt" +install: "make dev" # command to run tests script: - - flake8 email_parser tests - - nosetests --with-coverage --cover-inclusive --cover-erase --cover-package=email_parser --cover-min-percentage=70 + - make test + - make coverage diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..010636c --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,3 @@ +# ExecPlans + +When writing complex features or significant refactors, use an ExecPlan (as described in PLANS.md) from design to implementation. diff --git a/Makefile b/Makefile index d647a49..42765b6 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,16 @@ # Some simple testing tasks (sorry, UNIX only). PYTHON=venv/bin/python3 +PYTHON_BIN?=python3.11 PIP=venv/bin/pip EI=venv/bin/easy_install -NOSE=venv/bin/nosetests +TEST_RUNNER=venv/bin/nosetests FLAKE=venv/bin/flake8 EMAILS_TEMPLATES_URI=git@github.com:KeepSafe/emails.git EMAILS_PATH=emails GUI_BIN=ks-email-parser -FLAGS=--with-coverage --cover-inclusive --cover-erase --cover-package=email_parser --cover-min-percentage=70 +TEST_RUNNER_FLAGS=-s +COVERAGE=venv/bin/coverage PYPICLOUD_HOST=pypicloud.getkeepsafe.local TWINE=./venv/bin/twine @@ -18,7 +20,7 @@ update: $(PIP) install -U . env: - test -d venv || python3 -m venv venv + test -d venv || $(PYTHON_BIN) -m venv venv dev: env update $(PIP) install .[tests,devtools] @@ -39,17 +41,16 @@ flake: $(FLAKE) email_parser tests test: flake - $(NOSE) -s $(FLAGS) + $(COVERAGE) run $(TEST_RUNNER) $(TEST_RUNNER_FLAGS) vtest: - $(NOSE) -s -v $(FLAGS) + $(COVERAGE) run $(TEST_RUNNER) -v $(TEST_RUNNER_FLAGS) testloop: - while sleep 1; do $(NOSE) -s $(FLAGS); done + while sleep 1; do $(TEST_RUNNER) $(TEST_RUNNER_FLAGS); done cov cover coverage: - $(NOSE) -s --with-cover --cover-html --cover-html-dir ./coverage $(FLAGS) - echo "open file://`pwd`/coverage/index.html" + $(COVERAGE) report -m clean: rm -rf `find . -name __pycache__` diff --git a/PLANS.md b/PLANS.md new file mode 100644 index 0000000..c7985df --- /dev/null +++ b/PLANS.md @@ -0,0 +1,152 @@ +# Codex Execution Plans (ExecPlans): + +This document describes the requirements for an execution plan ("ExecPlan"), a design document that a coding agent can follow to deliver a working feature or system change. Treat the reader as a complete beginner to this repository: they have only the current working tree and the single ExecPlan file you provide. There is no memory of prior plans and no external context. + +## How to use ExecPlans and PLANS.md + +When authoring an executable specification (ExecPlan), follow PLANS.md to the letter. If it is not in your context, refresh your memory by reading the entire PLANS.md file. Be thorough in reading (and re-reading) source material to produce an accurate specification. When creating a spec, start from the skeleton and flesh it out as you do your research. + +When implementing an executable specification (ExecPlan), do not prompt the user for "next steps"; simply proceed to the next milestone. Keep all sections up to date, add or split entries in the list at every stopping point to affirmatively state the progress made and next steps. Resolve ambiguities autonomously, and commit frequently. + +When discussing an executable specification (ExecPlan), record decisions in a log in the spec for posterity; it should be unambiguously clear why any change to the specification was made. ExecPlans are living documents, and it should always be possible to restart from only the ExecPlan and no other work. + +When researching a design with challenging requirements or significant unknowns, use milestones to implement proof of concepts, "toy implementations", etc., that allow validating whether the user's proposal is feasible. Read the source code of libraries by finding or acquiring them, research deeply, and include prototypes to guide a fuller implementation. + +## Requirements + +NON-NEGOTIABLE REQUIREMENTS: + +* Every ExecPlan must be fully self-contained. Self-contained means that in its current form it contains all knowledge and instructions needed for a novice to succeed. +* Every ExecPlan is a living document. Contributors are required to revise it as progress is made, as discoveries occur, and as design decisions are finalized. Each revision must remain fully self-contained. +* Every ExecPlan must enable a complete novice to implement the feature end-to-end without prior knowledge of this repo. +* Every ExecPlan must produce a demonstrably working behavior, not merely code changes to "meet a definition". +* Every ExecPlan must define every term of art in plain language or do not use it. + +Purpose and intent come first. Begin by explaining, in a few sentences, why the work matters from a user's perspective: what someone can do after this change that they could not do before, and how to see it working. Then guide the reader through the exact steps to achieve that outcome, including what to edit, what to run, and what they should observe. + +The agent executing your plan can list files, read files, search, run the project, and run tests. It does not know any prior context and cannot infer what you meant from earlier milestones. Repeat any assumption you rely on. Do not point to external blogs or docs; if knowledge is required, embed it in the plan itself in your own words. If an ExecPlan builds upon a prior ExecPlan and that file is checked in, incorporate it by reference. If it is not, you must include all relevant context from that plan. + +## Formatting + +Format and envelope are simple and strict. Each ExecPlan must be one single fenced code block labeled as `md` that begins and ends with triple backticks. Do not nest additional triple-backtick code fences inside; when you need to show commands, transcripts, diffs, or code, present them as indented blocks within that single fence. Use indentation for clarity rather than code fences inside an ExecPlan to avoid prematurely closing the ExecPlan's code fence. Use two newlines after every heading, use # and ## and so on, and correct syntax for ordered and unordered lists. + +When writing an ExecPlan to a Markdown (.md) file where the content of the file is only the single ExecPlan, you should omit the triple backticks. + +Write in plain prose. Prefer sentences over lists. Avoid checklists, tables, and long enumerations unless brevity would obscure meaning. Checklists are permitted only in the `Progress` section, where they are mandatory. Narrative sections must remain prose-first. + +## Guidelines + +Self-containment and plain language are paramount. If you introduce a phrase that is not ordinary English ("daemon", "middleware", "RPC gateway", "filter graph"), define it immediately and remind the reader how it manifests in this repository (for example, by naming the files or commands where it appears). Do not say "as defined previously" or "according to the architecture doc." Include the needed explanation here, even if you repeat yourself. + +Avoid common failure modes. Do not rely on undefined jargon. Do not describe "the letter of a feature" so narrowly that the resulting code compiles but does nothing meaningful. Do not outsource key decisions to the reader. When ambiguity exists, resolve it in the plan itself and explain why you chose that path. Err on the side of over-explaining user-visible effects and under-specifying incidental implementation details. + +Anchor the plan with observable outcomes. State what the user can do after implementation, the commands to run, and the outputs they should see. Acceptance should be phrased as behavior a human can verify ("after starting the server, navigating to http://localhost:8080/health returns HTTP 200 with body OK") rather than internal attributes ("added a HealthCheck struct"). If a change is internal, explain how its impact can still be demonstrated (for example, by running tests that fail before and pass after, and by showing a scenario that uses the new behavior). + +Specify repository context explicitly. Name files with full repository-relative paths, name functions and modules precisely, and describe where new files should be created. If touching multiple areas, include a short orientation paragraph that explains how those parts fit together so a novice can navigate confidently. When running commands, show the working directory and exact command line. When outcomes depend on environment, state the assumptions and provide alternatives when reasonable. + +Be idempotent and safe. Write the steps so they can be run multiple times without causing damage or drift. If a step can fail halfway, include how to retry or adapt. If a migration or destructive operation is necessary, spell out backups or safe fallbacks. Prefer additive, testable changes that can be validated as you go. + +Validation is not optional. Include instructions to run tests, to start the system if applicable, and to observe it doing something useful. Describe comprehensive testing for any new features or capabilities. Include expected outputs and error messages so a novice can tell success from failure. Where possible, show how to prove that the change is effective beyond compilation (for example, through a small end-to-end scenario, a CLI invocation, or an HTTP request/response transcript). State the exact test commands appropriate to the project's toolchain and how to interpret their results. + +Capture evidence. When your steps produce terminal output, short diffs, or logs, include them inside the single fenced block as indented examples. Keep them concise and focused on what proves success. If you need to include a patch, prefer file-scoped diffs or small excerpts that a reader can recreate by following your instructions rather than pasting large blobs. + +## Milestones + +Milestones are narrative, not bureaucracy. If you break the work into milestones, introduce each with a brief paragraph that describes the scope, what will exist at the end of the milestone that did not exist before, the commands to run, and the acceptance you expect to observe. Keep it readable as a story: goal, work, result, proof. Progress and milestones are distinct: milestones tell the story, progress tracks granular work. Both must exist. Never abbreviate a milestone merely for the sake of brevity, do not leave out details that could be crucial to a future implementation. + +Each milestone must be independently verifiable and incrementally implement the overall goal of the execution plan. + +## Living plans and design decisions + +* ExecPlans are living documents. As you make key design decisions, update the plan to record both the decision and the thinking behind it. Record all decisions in the `Decision Log` section. +* ExecPlans must contain and maintain a `Progress` section, a `Surprises & Discoveries` section, a `Decision Log`, and an `Outcomes & Retrospective` section. These are not optional. +* When you discover optimizer behavior, performance tradeoffs, unexpected bugs, or inverse/unapply semantics that shaped your approach, capture those observations in the `Surprises & Discoveries` section with short evidence snippets (test output is ideal). +* If you change course mid-implementation, document why in the `Decision Log` and reflect the implications in `Progress`. Plans are guides for the next contributor as much as checklists for you. +* At completion of a major task or the full plan, write an `Outcomes & Retrospective` entry summarizing what was achieved, what remains, and lessons learned. + +# Prototyping milestones and parallel implementations + +It is acceptable - and often encouraged - to include explicit prototyping milestones when they de-risk a larger change. Examples: adding a low-level operator to a dependency to validate feasibility, or exploring two composition orders while measuring optimizer effects. Keep prototypes additive and testable. Clearly label the scope as "prototyping"; describe how to run and observe results; and state the criteria for promoting or discarding the prototype. + +Prefer additive code changes followed by subtractions that keep tests passing. Parallel implementations (e.g., keeping an adapter alongside an older path during migration) are fine when they reduce risk or enable tests to continue passing during a large migration. Describe how to validate both paths and how to retire one safely with tests. When working with multiple new libraries or feature areas, consider creating spikes that evaluate the feasibility of these features independently of one another, proving that the external library performs as expected and implements the features we need in isolation. + +## Skeleton of a Good ExecPlan + +```md +# + +This ExecPlan is a living document. The sections `Progress`, `Surprises & Discoveries`, `Decision Log`, and `Outcomes & Retrospective` must be kept up to date as work proceeds. + +If PLANS.md file is checked into the repo, reference the path to that file here from the repository root and note that this document must be maintained in accordance with PLANS.md. + +## Purpose / Big Picture + +Explain in a few sentences what someone gains after this change and how they can see it working. State the user-visible behavior you will enable. + +## Progress + +Use a list with checkboxes to summarize granular steps. Every stopping point must be documented here, even if it requires splitting a partially completed task into two ("done" vs. "remaining"). This section must always reflect the actual current state of the work. + +- [x] (2025-10-01 13:00Z) Example completed step. +- [ ] Example incomplete step. +- [ ] Example partially completed step (completed: X; remaining: Y). + +Use timestamps to measure rates of progress. + +## Surprises & Discoveries + +Document unexpected behaviors, bugs, optimizations, or insights discovered during implementation. Provide concise evidence. + +- Observation: ... + Evidence: ... + +## Decision Log + +Record every decision made while working on the plan in the format: + +- Decision: ... + Rationale: ... + Date/Author: ... + +## Outcomes & Retrospective + +Summarize outcomes, gaps, and lessons learned at major milestones or at completion. Compare the result against the original purpose. + +## Context and Orientation + +Describe the current state relevant to this task as if the reader knows nothing. Name the key files and modules by full path. Define any non-obvious term you will use. Do not refer to prior plans. + +## Plan of Work + +Describe, in prose, the sequence of edits and additions. For each edit, name the file and location (function, module) and what to insert or change. Keep it concrete and minimal. + +## Concrete Steps + +State the exact commands to run and where to run them (working directory). When a command generates output, show a short expected transcript so the reader can compare. This section must be updated as work proceeds. + +## Validation and Acceptance + +Describe how to start or exercise the system and what to observe. Phrase acceptance as behavior, with specific inputs and outputs. If tests are involved, say "run and expect passed; the new test fails before the change and passes after". + +## Idempotence and Recovery + +If steps can be repeated safely, say so. If a step is risky, provide a safe retry or rollback path. Keep the environment clean after completion. + +## Artifacts and Notes + +Include the most important transcripts, diffs, or snippets as indented examples. Keep them concise and focused on what proves success. + +## Interfaces and Dependencies + +Be prescriptive. Name the libraries, modules, and services to use and why. Specify the types, traits/interfaces, and function signatures that must exist at the end of the milestone. Prefer stable names and paths such as `crate::module::function` or `package.submodule.Interface`. E.g.: + +In crates/foo/planner.rs, define: + + pub trait Planner { + fn plan(&self, observed: &Observed) -> Vec; + } +``` + +If you follow the guidance above, a single, stateless agent -- or a human novice -- can read your ExecPlan from top to bottom and produce a working, observable result. That is the bar: SELF-CONTAINED, SELF-SUFFICIENT, NOVICE-GUIDING, OUTCOME-FOCUSED. + +When you revise a plan, you must ensure your changes are comprehensively reflected across all sections, including the living document sections, and you must write a note at the bottom of the plan describing the change and the reason why. ExecPlans must describe not just the what but the why for almost everything. diff --git a/README.md b/README.md index 7ba2ea9..71789c1 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The goal is to store emails in a unified format that is easy to translate and to ## Requirements -1. Python 3.+ +1. Python 3.11.13 2. libxml - on OSX install with `xcode-select --install` ## Installation diff --git a/email_parser/__init__.py b/email_parser/__init__.py index b0c4d92..d4fc77e 100644 --- a/email_parser/__init__.py +++ b/email_parser/__init__.py @@ -76,7 +76,7 @@ def get_email_components(self, email_name, locale): def get_email_variants(self, email_name): email = fs.email(self.root_path, email_name, const.DEFAULT_LOCALE) _, placeholders = reader.read(self.root_path, email) - variants = set([name for _, p in placeholders.items() for name in p.variants.keys()]) + variants = {name for _, p in placeholders.items() for name in p.variants.keys()} return list(variants) def delete_email(self, email_name): diff --git a/email_parser/cmd.py b/email_parser/cmd.py index b2aa78d..0505ec2 100644 --- a/email_parser/cmd.py +++ b/email_parser/cmd.py @@ -27,7 +27,7 @@ class ProgressConsoleHandler(logging.StreamHandler): def __init__(self, err_queue, warn_queue, *args, **kwargs): self.err_msgs_queue = err_queue self.warn_msgs_queue = warn_queue - super(ProgressConsoleHandler, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def _store_msg(self, msg, loglevel): if loglevel == logging.ERROR: @@ -60,7 +60,7 @@ def _flush_store(self, stream, msgs, header): stream.write(header) stream.write(self.terminator) for idx, msg in enumerate(msgs): - stream.write('%s. %s' % (idx + 1, msg)) + stream.write(f'{idx + 1}. {msg}') stream.write(self.terminator) def _flush_errors(self, stream): diff --git a/email_parser/fs.py b/email_parser/fs.py index 51f9a32..381e335 100644 --- a/email_parser/fs.py +++ b/email_parser/fs.py @@ -18,10 +18,10 @@ def _parse_params(pattern): params = [p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p] if 'name' not in params: raise MissingPatternParamError( - '{{name}} is a required parameter in the pattern but it is not present in {}'.format(pattern)) + f'{{{{name}}}} is a required parameter in the pattern but it is not present in {pattern}') if 'locale' not in params: raise MissingPatternParamError( - '{{locale}} is a required parameter in the pattern but it is not present in {}'.format(pattern)) + f'{{{{locale}}}} is a required parameter in the pattern but it is not present in {pattern}') return params diff --git a/email_parser/link_shortener.py b/email_parser/link_shortener.py index b05cf41..84519cc 100644 --- a/email_parser/link_shortener.py +++ b/email_parser/link_shortener.py @@ -1,7 +1,7 @@ import requests -class NullShortener(object): +class NullShortener: name = 'null' def __init__(self, config): @@ -11,7 +11,7 @@ def shorten(self, link): return link -class KsShortener(object): +class KsShortener: name = 'keepsafe' url = 'http://4uon.ly/url/' diff --git a/email_parser/markdown_ext.py b/email_parser/markdown_ext.py index e216163..13ad2a1 100644 --- a/email_parser/markdown_ext.py +++ b/email_parser/markdown_ext.py @@ -1,7 +1,8 @@ -from markdown.inlinepatterns import Pattern, ImagePattern, LinkPattern, LINK_RE, IMAGE_LINK_RE +import re + from markdown.blockprocessors import BlockProcessor from markdown.extensions import Extension -import re +from markdown.inlinepatterns import ImageInlineProcessor, LinkInlineProcessor, LINK_RE, IMAGE_LINK_RE from . import const @@ -23,7 +24,7 @@ def run(self, parent, blocks): parent.text = text -class BaseUrlImagePattern(Pattern): +class BaseUrlImageProcessor(ImageInlineProcessor): """ Adds base url to images which have relative path. """ @@ -43,40 +44,48 @@ def __init__(self, images_dir, *args): self.images_dir = images_dir.strip('/') else: self.images_dir = '' - self.image_pattern = ImagePattern(*args) def _is_url(self, text): url = text.strip().strip('/').split(' ')[0] return self.url_pattern.match(url) - def handleMatch(self, m): - if self._is_url(m.group(10)): - image = m.string - else: - image = const.IMAGE_PATTERN.format(m.group(2), self.images_dir, m.group(10).strip('/')) - pattern = re.compile("^(.*?)%s(.*?)$" % self.image_pattern.pattern, re.DOTALL | re.UNICODE) - match = re.match(pattern, ' ' + image + ' ') - el = self.image_pattern.handleMatch(match) + def handleMatch(self, m, data): + el, start, end = super().handleMatch(m, data) + if el is None: + return None, None, None + + src = el.get('src', '') + if src and not self._is_url(src): + src_path = src.strip('/') + if self.images_dir: + src = f'{self.images_dir}/{src_path}' if src_path else self.images_dir + else: + src = src_path + el.set('src', src) + # each markdown image should have default style el.set('style', self.unescape('max-width: 100%;')) - return el + return el, start, end -class NoTrackingLinkPattern(LinkPattern): +class NoTrackingLinkProcessor(LinkInlineProcessor): def __init__(self, *args): super().__init__(*args) - def handleMatch(self, m): - el = super().handleMatch(m) - if el.get('href') and el.get('href').startswith('!'): - el.set('href', el.get('href')[1:]) + def handleMatch(self, m, data): + el, start, end = super().handleMatch(m, data) + if el is None: + return None, None, None + href = el.get('href') + if href and href.startswith('!'): + el.set('href', href[1:]) el.set('clicktracking', 'off') - return el + return el, start, end class InlineTextExtension(Extension): - def extendMarkdown(self, md, md_globals): - md.parser.blockprocessors.add('inline_text', InlineBlockProcessor(md.parser), ']*)/>', r'', html) + + def _serialize_child(self, child): + markup = lxml_etree.tostring(child, encoding='unicode', method='xml', pretty_print=True) + markup = self._normalize_self_closing(markup) + if child.tag == 'p': + markup = self._indent_lines(markup, ' ') + return markup + def _wrap_with_text_direction(self, html): if self.locale in config.rtl_locales: soup = bs4.BeautifulSoup(html, 'html.parser') @@ -107,7 +147,7 @@ def _concat_parts(self, subject, parts, variant): content = _transform_extended_tags(self.template.content) return renderer.render(content, placeholders) except pystache.context.KeyNotFoundError as e: - message = 'template %s for locale %s has missing placeholders: %s' % (self.template.name, self.locale, e) + message = f'template {self.template.name} for locale {self.locale} has missing placeholders: {e}' raise MissingTemplatePlaceholderError(message) from e def render(self, placeholders, variant=None, highlight=None): @@ -118,7 +158,7 @@ def render(self, placeholders, variant=None, highlight=None): return html -class TextRenderer(object): +class TextRenderer: """ Renders email's body as text. """ @@ -138,7 +178,7 @@ def _html_to_text(self, html): href = anchor.get('href') or text # href = self.shortener.shorten(href) if href != text: - anchor.replace_with('{} ({})'.format(text, href)) + anchor.replace_with(f'{text} ({href})') elif href: anchor.replace_with(href) @@ -151,7 +191,7 @@ def _html_to_text(self, html): ordered_lists = soup('ol') for ordered_list in ordered_lists: for idx, element in enumerate(ordered_list('li')): - element.replace_with('%s. %s' % (idx + 1, element.string)) + element.replace_with(f'{idx + 1}. {element.string}') return soup.get_text() @@ -167,7 +207,7 @@ def render(self, placeholders, variant=None): return const.TEXT_EMAIL_PLACEHOLDER_SEPARATOR.join(v for v in filter(bool, parts)) -class SubjectRenderer(object): +class SubjectRenderer: """ Renders email's subject as text. """ @@ -190,7 +230,7 @@ def render(email_locale, template, placeholders, variant=None, highlight=None): try: html = html_renderer.render(placeholders, variant, highlight) except MissingTemplatePlaceholderError as e: - message = 'failed to generate html content for locale: {} with message: {}'.format(email_locale, e) + message = f'failed to generate html content for locale: {email_locale} with message: {e}' raise RenderingError(message) from e return subject, text, html diff --git a/plans/python-311-upgrade.md b/plans/python-311-upgrade.md new file mode 100644 index 0000000..940ab4b --- /dev/null +++ b/plans/python-311-upgrade.md @@ -0,0 +1,140 @@ +# Python 3.11.13 + Pyproject + Markdown 3 Upgrade Plan + +This ExecPlan is a living document. The sections `Progress`, `Surprises & Discoveries`, `Decision Log`, and `Outcomes & Retrospective` must be kept up to date as work proceeds. + +PLANS.md is checked into this repo at `PLANS.md` and this document must be maintained in accordance with it. + +## Purpose / Big Picture + +The goal is to keep ks-email-parser behavior identical while upgrading to Python 3.11.13, moving packaging metadata to `pyproject.toml`, switching the test runner to pynose, and updating dependencies (including lxml v5 and Markdown 3.x). After the change, a developer can install dependencies on Python 3.11.13, run the same CLI, and see the same rendered HTML/text outputs for our custom email syntax. Success is visible by running the test suite and seeing new targeted tests prove the custom markdown and CSS behavior still matches today. + +## Progress + +- [x] 2026-02-03 00:00Z Plan drafted and ExecPlan scaffolding installed (PLANS.md and AGENTS.md). +- [x] 2026-02-04 00:02Z Added baseline tests for inline text and base URL image behavior in `tests/test_markdown_ext.py`, ran `make test`, and confirmed all tests passed on current dependency versions. +- [x] 2026-02-04 02:41Z Added `pyproject.toml`, simplified `setup.py`, synced `requirements.txt`, bumped `.python-version` and `.travis.yml`, and switched Makefile to pynose with Python 3.11 venv creation. +- [x] 2026-02-04 02:41Z Ported `email_parser/markdown_ext.py` to Markdown 3.x InlineProcessor API; updated renderer for lxml 5 + inlinestyler compatibility; added renderer helper tests; `make test` and `make coverage` pass (93 tests, 85% coverage). +- [x] 2026-02-04 02:41Z `/review` checkpoints executed; no blocking issues found beyond preexisting URL handling edge cases. + +## Surprises & Discoveries + +- Observation: `make dev` failed under the default Python 3.14 because lxml 4.9.4 cannot build on that runtime. + Evidence: Build error in lxml C extensions (undeclared _PySet_NextEntry) when using Python 3.14; resolved by creating a Python 3.11 venv before running tests. +- Observation: Upgrading to lxml 5 breaks inlinestyler’s CSSSelector usage and HTML serialization expectations. + Evidence: `CSSSelector.evaluate` missing in lxml 5; output formatting regressed until renderer switched to lxml parsing and XML serialization with placeholder spacing restoration. +- Observation: URL detection still treats protocol-relative URLs as relative, which appears to be preexisting behavior. + Evidence: Review noted `//example.com/img` is not matched by the absolute URL regex and would be prefixed if base_url is set. + +## Decision Log + +- Decision: Keep `setup.cfg` for flake8 and coverage configuration while moving packaging metadata to `pyproject.toml`. + Rationale: Avoid adding new tooling dependencies purely for configuration migration; the existing configs are stable and can remain. + Date/Author: 2026-02-03, Codex +- Decision: Use pynose as the test runner while keeping tests in unittest style. + Rationale: Current tests already use unittest.TestCase and patches, so pynose should run them without structural changes. + Date/Author: 2026-02-03, Codex +- Decision: Use Python 3.11 for local venv creation during the upgrade work. + Rationale: lxml 4.9.4 fails to build under Python 3.14 in the current environment, and Python 3.11 is the target runtime. + Date/Author: 2026-02-04, Codex +- Decision: Add a compatibility shim for `inlinestyler.cssselect.CSSSelector.evaluate` when running on lxml 5. + Rationale: inlinestyler 0.2.x still calls `.evaluate`, which was removed in lxml 5; mapping to `__call__` preserves behavior without forking dependencies. + Date/Author: 2026-02-04, Codex +- Decision: Use lxml HTML parsing and XML serialization in `_inline_css`, plus placeholder spacing restoration for `{{ ... }}`. + Rationale: lxml XML serialization preserves prior whitespace/self-closing formatting and avoids unwanted `%20` encoding in placeholders. + Date/Author: 2026-02-04, Codex + +## Outcomes & Retrospective + +Not started. + +## Context and Orientation + +This repo ships a CLI for rendering email templates with custom Markdown behavior and CSS inlining. Packaging currently lives in `setup.py`, tool config in `setup.cfg`, and runtime pins in `requirements.txt`. Tests live in `tests/` and rely on `unittest`. The custom Markdown behavior is implemented in `email_parser/markdown_ext.py` and consumed by `email_parser/renderer.py` via `markdown.markdown(...)`. The custom behaviors that must not change include: + +Inline text blocks: `[[...]]` should render as raw text without paragraph wrapping, used for placing raw values in HTML attributes. + +Base URL images: relative image links like `![Alt](/path/img.jpg)` should be rewritten to include `config.base_img_path`. + +No-tracking links: links whose href is prefixed with `!` should render with the `!` removed and add `clicktracking="off"`. + +Link locale substitution: `{link_locale}` should be replaced with the normalized locale (including mapping rules in config). + +CSS inlining: template CSS should be inlined in rendered HTML, relying on `inlinestyler` + `cssutils` + `lxml`. + +Markdown 3.x deprecates older `Pattern`/`LinkPattern` APIs, so `email_parser/markdown_ext.py` will need to move to the supported InlineProcessor API to preserve the behaviors above. + +## Plan of Work + +First, lock in baseline behavior before any dependency upgrades. Add new unit tests that directly exercise the custom Markdown and CSS behaviors above so that test failures clearly indicate behavior drift. These tests should call the existing renderer functions with small inputs so the expected HTML/text output is explicit and readable. Run the current test command to ensure the new tests pass on the existing dependency set. + +Next, create `pyproject.toml` with PEP 621 metadata that mirrors `setup.py` and move dependency declarations there. Keep `setup.cfg` for tool configs. Update `.python-version` to 3.11.13 and update CI config to match. Replace pytest references with pynose in the test extras and Makefile. Update dependency pins to Python 3.11 compatible versions, including lxml v5 and Markdown 3.x. Keep `requirements.txt` in sync with `pyproject.toml` runtime pins so installations remain predictable. + +Then, refactor `email_parser/markdown_ext.py` to the Markdown 3.x extension API, keeping all custom behaviors identical. This includes replacing `Pattern`, `LinkPattern`, and `ImagePattern` usage with `InlineProcessor` subclasses and updating `extendMarkdown` signatures. The new tests should detect any behavior drift, so update code until tests pass. Ensure the renderer still uses the same Markdown extensions and returns identical HTML/text outputs. + +Finally, rerun the full test suite under Python 3.11.13 with upgraded dependencies and record results. Add review checkpoints mid-implementation and before handoff, and update this plan as changes land. + +## Concrete Steps + +All commands run from the repo root unless noted. + +Add baseline tests (before upgrades): + + rg -n "markdown_ext|inline|clicktracking|base_url" tests email_parser + # Add tests in tests/ (new file ok) to lock in custom Markdown and CSS behaviors. + make test + +Insert `/review` checkpoint after baseline tests: + + codex exec "/review" + +Add pyproject, dependency pins, and pynose migration: + + # Create pyproject.toml with packaging metadata and dependencies. + # Update .python-version and .travis.yml to 3.11.13. + # Update Makefile and test extras to use pynose. + # Align requirements.txt with runtime pins. + make test + +Insert `/review` checkpoint after packaging and dependency changes: + + codex exec "/review" + +Update Markdown 3.x compatibility: + + # Refactor email_parser/markdown_ext.py to InlineProcessor-based extensions. + # Run tests until new and existing tests pass. + make test + make coverage + +Final `/review` checkpoint before handoff: + + codex exec "/review" + +## Validation and Acceptance + +The change is accepted when all tests pass under Python 3.11.13 with the upgraded dependencies and pynose runner. The newly added tests must verify that inline text `[[...]]`, base URL image rewriting, no-tracking links, locale substitution, and CSS inlining produce the same outputs as before the upgrade. The CLI should still render emails without errors; this can be confirmed by running `ks-email-parser` against `tests/` fixtures and observing that outputs match expected fixtures. + +## Idempotence and Recovery + +All edits are safe to apply multiple times; re-running `make test` should be repeatable. If a dependency upgrade breaks behavior, revert only that dependency pin to the prior known-good version and rerun the test that failed to isolate the cause. If Markdown 3.x changes are too large to land at once, keep a temporary adapter layer inside `email_parser/markdown_ext.py` that supports both old and new APIs while tests are stabilized, then remove the old path once tests pass. + +## Artifacts and Notes + +Keep short test output snippets here as evidence during implementation, for example: + + $ make test + ... + OK (78 tests) + +## Interfaces and Dependencies + +The core extension APIs are in `email_parser/markdown_ext.py`. After the upgrade, this module must expose `inline_text()`, `base_url(base_url)`, and `no_tracking()` and they must return Markdown 3.x compatible `Extension` objects using `InlineProcessor` subclasses. The renderer must continue to call `markdown.markdown(text, extensions=extensions)` in `email_parser/renderer.py`. + +Dependencies should be pinned to versions that support Python 3.11.13, including lxml v5 and Markdown 3.x. Runtime pins must be declared in `pyproject.toml` and `requirements.txt`, and test/dev pins must include pynose, coverage, and flake8 (or whatever the project already uses). If an updated dependency changes behavior, adjust code to preserve behavior rather than downgrading unless there is a clear compatibility failure. + +## Plan Change Notes + +Initial plan created to guide the upgrade and testing work. +2026-02-04: Updated Progress, Surprises, and Decision Log after adding baseline tests and encountering Python 3.14/lxml build failures. +2026-02-04: Marked packaging + markdown upgrades complete, recorded inlinestyler/lxml compatibility decisions, and noted passing tests/coverage. +2026-02-04: Recorded /review completion and documented preexisting URL edge-case note. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..366a46a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +requires = ["setuptools>=61", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "ks-email-parser" +version = "1.0.0" +description = "A command line tool to render HTML and text emails of markdown content." +readme = "README.md" +requires-python = ">=3.11" +license = { text = "Apache" } +authors = [ + { name = "Keepsafe", email = "support@getkeepsafe.com" }, +] +classifiers = [ + "License :: OSI Approved :: BSD License", + "Intended Audience :: Developers", + "Programming Language :: Python", +] +dependencies = [ + "Markdown==3.10.1", + "beautifulsoup4==4.14.3", + "cssutils==2.11.1", + "inlinestyler==0.2.5", + "lxml==5.4.0", + "parse==1.20.2", + "pystache==0.6.8", +] + +[project.urls] +Homepage = "https://github.com/KeepSafe/ks-email-parser" + +[project.scripts] +ks-email-parser = "email_parser.cmd:main" + +[project.optional-dependencies] +tests = [ + "coverage==7.13.3", + "flake8==3.9.2", + "pynose==1.5.5", +] +devtools = [ + "build==1.4.0", + "twine==6.2.0", +] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +where = ["."] +exclude = ["tests*"] diff --git a/requirements.txt b/requirements.txt index 11822e8..f3b53a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -Markdown==2.6.11 -beautifulsoup4==4.4.1 -cssutils==1.0.1 -inlinestyler==0.2.1 -lxml==3.5 -pystache==0.5.4 -parse==1.8.2 +Markdown==3.10.1 +beautifulsoup4==4.14.3 +cssutils==2.11.1 +inlinestyler==0.2.5 +lxml==5.4.0 +pystache==0.6.8 +parse==1.20.2 diff --git a/setup.cfg b/setup.cfg index f64a1ca..5534fda 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,12 @@ -[metadata] -description-file = README.md - [flake8] -max-line-length = 120 +max_line_length = 120 ignore = F403, F405, F401 [pep8] -max-line-length = 120 +max_line_length = 120 + +[coverage:run] +branch = True + +[coverage:report] +fail_under = 85 diff --git a/setup.py b/setup.py index 4efd8de..6068493 100644 --- a/setup.py +++ b/setup.py @@ -1,49 +1,3 @@ -import os -from setuptools import setup, find_packages - -version = '0.3.2' - -install_requires = [ - 'Markdown < 3', - 'beautifulsoup4 < 5', - 'inlinestyler==0.2.1', - 'pystache < 0.6', - 'parse < 2' -] - -tests_require = [ - 'nose', - 'flake8==2.5.4', - 'coverage', -] - -devtools_require = [ - 'twine', - 'build', -] - - -def read(f): - return open(os.path.join(os.path.dirname(__file__), f)).read().strip() - -setup( - name='ks-email-parser', - version=version, - description=('A command line tool to render HTML and text emails of markdown content.'), - classifiers=[ - 'License :: OSI Approved :: BSD License', 'Intended Audience :: Developers', 'Programming Language :: Python' - ], - author='Keepsafe', - author_email='support@getkeepsafe.com', - url='https://github.com/KeepSafe/ks-email-parser', - license='Apache', - packages=find_packages(), - install_requires=install_requires, - tests_require=tests_require, - extras_require={ - 'tests': tests_require, - 'devtools': devtools_require, - }, - entry_points={'console_scripts': ['ks-email-parser = email_parser.cmd:main']}, - include_package_data=True) +from setuptools import setup +setup() diff --git a/tests/test_fs.py b/tests/test_fs.py index c0c004a..3b67c76 100644 --- a/tests/test_fs.py +++ b/tests/test_fs.py @@ -4,7 +4,7 @@ from email_parser.model import * -class MockPath(object): +class MockPath: def __init__(self, path, is_dir=False, parent='.'): self.path = path self._is_dir = is_dir @@ -29,6 +29,16 @@ def __str__(self): return self.path +class TestUtilities(TestCase): + def test__parse_params_exceptions(self): + with self.assertRaises(MissingPatternParamError): + fs._parse_params('test') + with self.assertRaises(MissingPatternParamError): + fs._parse_params('test {name}') + with self.assertRaises(MissingPatternParamError): + fs._parse_params('test {locale}') + + class TestFs(TestCase): def setUp(self): self.patch_path = patch('email_parser.fs.Path') diff --git a/tests/test_markdown_ext.py b/tests/test_markdown_ext.py new file mode 100644 index 0000000..2740914 --- /dev/null +++ b/tests/test_markdown_ext.py @@ -0,0 +1,25 @@ +from unittest import TestCase + +from email_parser import renderer, config + + +class TestMarkdownExtensions(TestCase): + def setUp(self): + config.init(_base_img_path='images_base') + + def tearDown(self): + config.init() + + def test_inline_text_block(self): + html = renderer._md_to_html('[[#C0D9D9]]') + self.assertEqual('#C0D9D9', html.strip()) + + def test_base_url_image_is_rewritten(self): + html = renderer._md_to_html('![Alt text](/path/to/img.jpg)', base_url='images_base') + self.assertIn('src="images_base/path/to/img.jpg"', html) + self.assertIn('style="max-width: 100%;"', html) + + def test_base_url_image_skips_absolute_url(self): + html = renderer._md_to_html('![Alt text](http://example.com/img.jpg)', base_url='images_base') + self.assertIn('src="http://example.com/img.jpg"', html) + self.assertIn('style="max-width: 100%;"', html) diff --git a/tests/test_renderer.py b/tests/test_renderer.py index 1d0beea..d93a317 100644 --- a/tests/test_renderer.py +++ b/tests/test_renderer.py @@ -199,7 +199,8 @@ def test_rtl_locale(self): placeholders = {'content': Placeholder('content', 'dummy_content')} actual = r.render(placeholders) - self.assertEqual('\n

\n dummy_content\n

\n', actual) + expected = '\n

\n dummy_content\n

\n\n' + self.assertEqual(expected, actual) def test_rtl_two_placeholders(self): email_locale = 'ar' @@ -214,7 +215,7 @@ def test_rtl_two_placeholders(self): actual = r.render(placeholders) expected = '\n
\n

\n dummy_content1\n

\n
\n
\n

\n dummy_content2\n

\n
\ -\n' +\n\n' self.assertEqual(expected, actual) @@ -254,3 +255,41 @@ def test_transform_extended_tags(self): expected = '{{MY_BITMAP}}' result = renderer._transform_extended_tags(content) self.assertEqual(result, expected) + + def test_wrap_with_highlight(self): + template = Template('dummy', [], '', '{{content}}', ['content'], None) + r = renderer.HtmlRenderer(template, self.email_locale) + placeholders = {'content': Placeholder('content', 'dummy_content')} + highlight = {'placeholder': 'content', 'variant': None, 'id': 'hl', 'style': 'color: red'} + + actual = r.render(placeholders, highlight=highlight) + + self.assertIn('id="hl"', actual) + self.assertIn('style="color: red"', actual) + + def test_restore_placeholder_spacing(self): + template = Template('dummy', [], '', '{{content}}', ['content'], None) + r = renderer.HtmlRenderer(template, self.email_locale) + restored = r._restore_placeholder_spacing('') + self.assertIn('{{ GLOBAL_IMG_BUCKET }}', restored) + + def test_indent_lines_helper(self): + self.assertEqual('line', renderer.HtmlRenderer._indent_lines('line', ' ')) + self.assertEqual('line1\n line2\n', renderer.HtmlRenderer._indent_lines('line1\nline2\n', ' ')) + + def test_normalize_self_closing(self): + html = 'Alt text' + self.assertEqual('Alt text', renderer.HtmlRenderer._normalize_self_closing(html)) + + +class TestRendererErrors(TestCase): + def test_render_wraps_missing_placeholder_error(self): + template = Template('dummy', [], '', '{{content}}{{missing}}', + ['content', 'missing'], None) + placeholders = { + 'subject': Placeholder('subject', 'dummy_subject'), + 'content': Placeholder('content', 'dummy_content') + } + + with self.assertRaises(RenderingError): + renderer.render('en', template, placeholders)