Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
* text=auto
examples/a-lot-of-includes/ -diff
examples/a-lot-of-includes/docs/index.md -diff
5,095 changes: 5,095 additions & 0 deletions examples/a-lot-of-includes/docs/index.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "mkdocs-include-markdown-plugin"
version = "7.1.4"
version = "7.1.5"
description = "Mkdocs Markdown includer plugin."
readme = "README.md"
license = "Apache-2.0"
Expand Down
100 changes: 76 additions & 24 deletions src/mkdocs_include_markdown_plugin/directive.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class DirectiveBoolArgument: # noqa: D101


if TYPE_CHECKING: # pragma: no cover
from collections.abc import Iterable
from typing import Callable, Literal, TypedDict

DirectiveBoolArgumentsDict = dict[str, DirectiveBoolArgument]
Expand All @@ -50,18 +51,18 @@ class DirectiveBoolArgument: # noqa: D101
DOUBLE_QUOTED_STR_RE = r'([^"]|(?<=\\)")+'
SINGLE_QUOTED_STR_RE = r"([^']|(?<=\\)')+"

# In the following regular expression, the substrings "$OPENING_TAG"
# and "$CLOSING_TAG" will be replaced by the effective opening and
# closing tags in the `on_config` plugin event.
INCLUDE_TAG_RE = rf'''
(?P<_includer_indent>[ \t\w\\.]*?)$OPENING_TAG
# In the following regular expression, the substrings "\{%", "%\}"
# will be replaced by custom opening and closing tags in the `on_config`
# plugin event if required.
INCLUDE_TAG_RE = r'''
(?P<_includer_indent>[ \t\w\\.]*?)\{%
\s*
include
\s+
(?:"(?P<double_quoted_filename>{DOUBLE_QUOTED_STR_RE})")?(?:'(?P<single_quoted_filename>{SINGLE_QUOTED_STR_RE})')?
(?:"(?P<double_quoted_filename>''' + DOUBLE_QUOTED_STR_RE + r''')")?(?:'(?P<single_quoted_filename>''' + SINGLE_QUOTED_STR_RE + r''')')?
(?P<arguments>.*?)
\s*
$CLOSING_TAG
%\}
''' # noqa: E501

TRUE_FALSE_STR_BOOL = {
Expand Down Expand Up @@ -110,6 +111,7 @@ def str_arg(arg: str) -> re.Pattern[str]:
'heading-offset': functools.partial(arg, 'heading-offset'),
}

INCLUDE_MARKDOWN_DIRECTIVE_ARGS = set(ARGUMENT_REGEXES)
INCLUDE_DIRECTIVE_ARGS = {
key for key in ARGUMENT_REGEXES if key not in (
'rewrite-relative-urls', 'heading-offset', 'comments',
Expand All @@ -121,6 +123,45 @@ def str_arg(arg: str) -> re.Pattern[str]:
)


def _maybe_arguments_iter(arguments_string: str) -> Iterable[str]:
"""Iterate over parts of the string that look like arguments."""
current_string_opening = '' # can be either `'` or `"`
inside_string = False
escaping = False
opening_argument = False # whether we are at the beginning of an argument
current_value = ''

for c in arguments_string:
if inside_string:
if c == '\\':
escaping = not escaping
continue
elif c == current_string_opening and not escaping:
inside_string = False
current_string_opening = ''
else:
escaping = False
elif c == '=':
new_current_value = ''
for ch in reversed(current_value):
if ch in string.whitespace:
current_value = new_current_value[::-1]
break
new_current_value += ch
yield current_value
current_value = ''
opening_argument = True
elif opening_argument:
opening_argument = False
if c in ('"', "'"):
current_string_opening = c
inside_string = True
current_value += c
current_value += c
else:
current_value += c


def warn_invalid_directive_arguments(
arguments_string: str,
directive_lineno: Callable[[], int],
Expand All @@ -130,18 +171,17 @@ def warn_invalid_directive_arguments(
) -> None:
"""Warns about the invalid arguments passed to a directive."""
valid_args = (
INCLUDE_DIRECTIVE_ARGS if directive == 'include'
else set(ARGUMENT_REGEXES)
INCLUDE_DIRECTIVE_ARGS
if directive == 'include'
else INCLUDE_MARKDOWN_DIRECTIVE_ARGS
)
for arg_value in WARN_INVALID_DIRECTIVE_ARGS_REGEX.findall(
arguments_string,
):
if arg_value.split('=', 1)[0] not in valid_args:
for maybe_arg in _maybe_arguments_iter(arguments_string):
if maybe_arg not in valid_args:
location = process.file_lineno_message(
page_src_path, docs_dir, directive_lineno(),
)
logger.warning(
f"Invalid argument '{arg_value}' in"
f"Invalid argument '{maybe_arg}' in"
f" '{directive}' directive at {location}. Ignoring...",
)

Expand All @@ -156,9 +196,9 @@ def parse_filename_argument(
if raw_filename is None:
filename = None
else:
filename = raw_filename.replace("\\'", "'")
filename = raw_filename.replace(r"\'", "'")
else:
filename = raw_filename.replace('\\"', '"')
filename = raw_filename.replace(r'\"', '"')
return filename, raw_filename


Expand All @@ -168,9 +208,9 @@ def parse_string_argument(match: re.Match[str]) -> str | None:
if value is None:
value = match[3]
if value is not None:
value = value.replace("\\'", "'")
value = value.replace(r"\'", "'")
else:
value = value.replace('\\"', '"')
value = value.replace(r'\"', '"')
return value


Expand All @@ -182,12 +222,24 @@ def create_include_tag(
Replaces the substrings '$OPENING_TAG' and '$CLOSING_TAG' from
INCLUDE_TAG_RE by the effective tag.
"""
return re.compile(
INCLUDE_TAG_RE.replace(' include', f' {tag}', 1).replace(
'$OPENING_TAG', re.escape(opening_tag), 1,
).replace('$CLOSING_TAG', re.escape(closing_tag), 1),
flags=re.VERBOSE | re.DOTALL,
)
pattern = INCLUDE_TAG_RE
if tag != 'include':
pattern = pattern.replace(
' include',
(
' include-markdown' if tag == 'include-markdown'
else f' {re.escape(tag)}'
),
1,
)

if opening_tag != '{%':
pattern = pattern.replace(r'\{%', re.escape(opening_tag), 1)

if closing_tag != '%}':
pattern = pattern.replace(r'%\}', re.escape(closing_tag), 1)

return re.compile(pattern, flags=re.VERBOSE | re.DOTALL)


def parse_bool_options(
Expand Down
100 changes: 65 additions & 35 deletions src/mkdocs_include_markdown_plugin/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,10 @@ def transform_line_by_line_skipping_codeblocks(
markdown: str,
func: Callable[[str], str],
) -> str:
"""Apply a transformation line by line in a Markdown text using a function.
"""Apply a transformation line by line in a Markdown text using a function,.

Skip fenced codeblock lines, where the transformation never is applied.
Skip fenced codeblock lines and empty lines, where the transformation
is never applied.

Indented codeblocks are not taken into account because in the practice
this function is only used for transformations of heading prefixes. See
Expand All @@ -263,13 +264,15 @@ def transform_line_by_line_skipping_codeblocks(

lines = []
for line in io.StringIO(markdown):
lstripped_line = line.lstrip()
if not _current_fcodeblock_delimiter:
lstripped_line = line.lstrip()
if lstripped_line.startswith(('```', '~~~')):
_current_fcodeblock_delimiter = lstripped_line[:3]
if lstripped_line.startswith('```'):
_current_fcodeblock_delimiter = '```'
elif lstripped_line.startswith('~~~'):
_current_fcodeblock_delimiter = '~~~'
else:
line = func(line) # noqa: PLW2901
elif line.lstrip().startswith(_current_fcodeblock_delimiter):
elif lstripped_line.startswith(_current_fcodeblock_delimiter):
_current_fcodeblock_delimiter = ''
lines.append(line)

Expand All @@ -287,39 +290,27 @@ def rewrite_relative_urls(
``source_path`` will still work when inserted into a file at
``destination_path``.
"""
from urllib.parse import urlparse, urlunparse

def rewrite_url(url: str) -> str:
if is_url(url):
return url

scheme, netloc, path, params, query, fragment = urlparse(url)

# absolute or mail
if path.startswith('/') or scheme == 'mailto':
if is_url(url) or is_absolute_path(url):
return url

new_path = os.path.relpath(
os.path.join(os.path.dirname(source_path), path),
os.path.join(os.path.dirname(source_path), url),
os.path.dirname(destination_path),
)

# ensure forward slashes are used, on Windows
new_path = new_path.replace('\\', '/').replace('//', '/')

try:
if path[-1] == '/':
if url[-1] == '/':
# the above operation removes a trailing slash,
# so add it back if it was present in the input
new_path += '/'
except IndexError: # pragma: no cover
pass

# ensure that links to the same file are not rewritten
if new_path == '.':
new_path = ''

return urlunparse((scheme, netloc, new_path, params, query, fragment))
return new_path

def found_href(m: re.Match[str], url_group_index: int = -1) -> str:
match_start, match_end = m.span(0)
Expand Down Expand Up @@ -528,27 +519,68 @@ def filter_paths(
return response


def _is_valid_url_scheme_char(c: str) -> bool:
"""Determine is a character is a valid URL scheme character.

Valid characters are:

```
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-.
```
"""
codepoint = ord(c)
A = 65
Z = 90
a = 97
z = 122
zero = 48
nine = 57
dot = 46
plus = 43
minus = 45
return (
A <= codepoint <= Z
or a <= codepoint <= z
or zero <= codepoint <= nine
or codepoint in (plus, minus, dot)
)


def is_url(string: str) -> bool:
"""Determine if a string is an URL."""
if ':' not in string: # fast path
"""Determine if a string is an URL.

The implementation has been adapted from `urllib.urlparse`.
"""
i = string.find(':')
if i <= 1: # noqa: PLR2004 -> exclude C: or D: on Windows
return False
from urllib.parse import urlparse

try:
result = urlparse(string)
return all([result.scheme, result.netloc])
except ValueError: # pragma: no cover
return all(_is_valid_url_scheme_char(string[j]) for j in range(i))
except (IndexError, ValueError): # pragma: no cover
return False


def is_relative_path(string: str) -> bool:
"""Check if a string looks like a relative path."""
return string.startswith(('./', '../'))
try:
return (
string[0] == '.'
and (
string[1] == '/'
or (string[1] == '.' and string[2] == '/')
)
)
except IndexError: # pragma: no cover
return False


def is_absolute_path(string: str) -> bool:
"""Check if a string looks like an absolute path."""
return string.startswith((os.sep, '/'))
try:
return string[0] == '/' or string[0] == os.sep
except IndexError: # pragma: no cover
return False


def read_file(file_path: str, encoding: str) -> str:
Expand Down Expand Up @@ -581,14 +613,12 @@ def read_url(
def safe_os_path_relpath(path: str, start: str) -> str:
"""Return the relative path of a file from a start directory.

Safe version of `os.path.relpath` that catches `ValueError` exceptions
on Windows and returns the original path in case of error.
Safe version of `os.path.relpath` that catches possible `ValueError`
exceptions and returns the original path in case of error.
On Windows, `ValueError` is raised when `path` and `start` are on
different drives.
"""
if os.name != 'nt': # pragma: nt no cover
return os.path.relpath(path, start)
try: # pragma: nt cover
try:
return os.path.relpath(path, start)
except ValueError: # pragma: no cover
return path
Expand Down
2 changes: 1 addition & 1 deletion tests/test_unit/test_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def test_invalid_argument_name(directive, page, tmp_path, plugin, caplog):

assert len(caplog.records) == 1
assert caplog.records[0].msg == (
f"Invalid argument 'invalid-argument=true' in '{directive}'"
f"Invalid argument 'invalid-argument' in '{directive}'"
" directive at includer.md:1. Ignoring..."
)

Expand Down