diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a869c05..7d0e306 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -35,7 +35,7 @@ jobs:
test:
strategy:
matrix:
- html_parser: [floki, meeseeks]
+ html_parser: [Floki, Meeseeks, LazyHTML]
version:
- otp: 28.0
elixir: 1.19
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b23d4a0..4e1217a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ Requires Elixir 1.14 or higher.
* Fixed compiler warnings in `Premailex.HTMLParser.Meeseeks`
* Fixed invalid spec in `Premailex.HTMLInlineStyles.process/3`
+* Added support for `LazyHTML`
## v0.3.20 (2025-01-20)
diff --git a/README.md b/README.md
index 1c56139..e7a6b38 100644
--- a/README.md
+++ b/README.md
@@ -89,10 +89,32 @@ end
## HTML parser
-By default, premailex uses [`Floki`](https://github.com/philss/floki) to parse HTML, but you can exchange it for any HTML parser you prefer. [`Meeseeks`](https://github.com/mischov/meeseeks) is supported with the [`Premailex.HTMLParser.Meeseeks`](/lib/premailex/html_parser/meeseeks.ex) module. To use it, add the following to `config.exs`:
+Premailex supports multiple HTML parsers and will automatically select one based on what's available:
+
+1. [`Floki`](https://github.com/philss/floki) (default if available)
+2. [`Meeseeks`](https://github.com/mischov/meeseeks)
+3. [`LazyHTML`](https://github.com/wojtekmach/lazy_html)
+
+**At least one parser dependency must be added to your `mix.exs`:**
+
+```elixir
+def deps do
+ [
+ {:premailex, "~> 0.3.20"},
+ # Add at least one of these:
+ {:floki, "~> 0.19"}, # Recommended default
+ # {:meeseeks, "~> 0.11"}, # Alternative option
+ # {:lazy_html, "~> 0.1.8"}, # Alternative option
+ ]
+end
+```
+
+To explicitly configure which parser to use, add to your `config.exs`:
```elixir
config :premailex, html_parser: Premailex.HTMLParser.Meeseeks
+# or
+config :premailex, html_parser: Premailex.HTMLParser.LazyHTML
```
## LICENSE
diff --git a/lib/premailex/html_parser.ex b/lib/premailex/html_parser.ex
index 3391430..04a0fcd 100644
--- a/lib/premailex/html_parser.ex
+++ b/lib/premailex/html_parser.ex
@@ -1,9 +1,14 @@
defmodule Premailex.HTMLParser do
@moduledoc """
Module that provide HTML parsing API using an underlying HTML parser library.
- """
- @default_parser Premailex.HTMLParser.Floki
+ By default, premailex will try to use Floki, then LazyHTML, then Meeseeks
+ (in that order) based on what's available.
+
+ You can explicitly configure which parser to use in your config:
+
+ config :premailex, html_parser: Premailex.HTMLParser.LazyHTML
+ """
@type html_tree :: tuple() | list()
@type selector :: binary()
@@ -23,7 +28,39 @@ defmodule Premailex.HTMLParser do
{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Title"]}]}]}
"""
@spec parse(binary()) :: html_tree()
- def parse(html), do: parser().parse(html)
+ def parse(html), do: html_parser().parse(html)
+
+ defp html_parser do
+ case Application.get_env(:premailex, :html_parser) || default_html_parser!() do
+ mod when is_atom(mod) -> mod
+ other -> raise "Invalid html_parser, got: #{inspect(other)}"
+ end
+ end
+
+ defp default_html_parser! do
+ cond do
+ Code.ensure_loaded?(Floki) ->
+ Premailex.HTMLParser.Floki
+
+ Code.ensure_loaded?(LazyHTML) ->
+ Premailex.HTMLParser.LazyHTML
+
+ Code.ensure_loaded?(Meeseeks) ->
+ Premailex.HTMLParser.Meeseeks
+
+ true ->
+ raise """
+ No HTML parser is available. Please add at least one of the following dependencies to your mix.exs:
+
+ - {:floki, "~> 0.19"}
+ - {:meeseeks, "~> 0.11"}
+ - {:lazy_html, "~> 0.1.8"}
+
+ Or explicitly configure a parser:
+ config :premailex, html_parser: Premailex.HTMLParser.Floki
+ """
+ end
+ end
@doc """
Searches an HTML tree for the selector.
@@ -34,7 +71,7 @@ defmodule Premailex.HTMLParser do
[{"h1", [], ["Title"]}]
"""
@spec all(html_tree(), selector()) :: [html_tree()]
- def all(tree, selector), do: parser().all(tree, selector)
+ def all(tree, selector), do: html_parser().all(tree, selector)
@doc """
Filters elements matching the selector from the HTML tree.
@@ -45,7 +82,7 @@ defmodule Premailex.HTMLParser do
[{"html", [], [{"head", [], []}, {"body", [], []}]}]
"""
@spec filter(html_tree(), selector()) :: [html_tree()]
- def filter(tree, selector), do: parser().filter(tree, selector)
+ def filter(tree, selector), do: html_parser().filter(tree, selector)
@doc """
Turns an HTML tree into a string.
@@ -56,7 +93,7 @@ defmodule Premailex.HTMLParser do
"
Title
"
"""
@spec to_string(html_tree()) :: binary()
- def to_string(tree), do: parser().to_string(tree)
+ def to_string(tree), do: html_parser().to_string(tree)
@doc """
Extracts text elements from the HTML tree.
@@ -67,9 +104,5 @@ defmodule Premailex.HTMLParser do
"Title"
"""
@spec text(html_tree()) :: binary()
- def text(tree), do: parser().text(tree)
-
- defp parser do
- Application.get_env(:premailex, :html_parser, @default_parser)
- end
+ def text(tree), do: html_parser().text(tree)
end
diff --git a/lib/premailex/html_parser/floki.ex b/lib/premailex/html_parser/floki.ex
index 4df1103..848be30 100644
--- a/lib/premailex/html_parser/floki.ex
+++ b/lib/premailex/html_parser/floki.ex
@@ -1,59 +1,61 @@
-defmodule Premailex.HTMLParser.Floki do
- @moduledoc false
- alias Premailex.HTMLParser
-
- @behaviour HTMLParser
-
- @impl true
- @doc false
- def parse(html) do
- html = retain_inline_whitespace(html)
- args = [html]
-
- "< 0.24.0"
- |> floki_version_match?()
- |> case do
- true -> apply(Floki, :parse, args)
- false -> apply(Floki, :parse_document, args)
+if Code.ensure_loaded?(Floki) do
+ defmodule Premailex.HTMLParser.Floki do
+ @moduledoc false
+ alias Premailex.HTMLParser
+
+ @behaviour HTMLParser
+
+ @impl true
+ @doc false
+ def parse(html) do
+ html = retain_inline_whitespace(html)
+ args = [html]
+
+ "< 0.24.0"
+ |> floki_version_match?()
+ |> case do
+ true -> apply(Floki, :parse, args)
+ false -> apply(Floki, :parse_document, args)
+ end
+ |> case do
+ {:ok, [html]} -> html
+ {:ok, document} -> document
+ any -> any
+ end
end
- |> case do
- {:ok, [html]} -> html
- {:ok, document} -> document
- any -> any
- end
- end
- defp floki_version_match?(req) do
- case :application.get_key(:floki, :vsn) do
- {:ok, actual} ->
- actual
- |> List.to_string()
- |> Version.match?(req)
+ defp floki_version_match?(req) do
+ case :application.get_key(:floki, :vsn) do
+ {:ok, actual} ->
+ actual
+ |> List.to_string()
+ |> Version.match?(req)
- _any ->
- false
+ _any ->
+ false
+ end
end
- end
- @impl true
- @doc false
- def all(tree, selector), do: Floki.find(tree, selector)
+ @impl true
+ @doc false
+ def all(tree, selector), do: Floki.find(tree, selector)
- @impl true
- @doc false
- def filter(tree, selector), do: Floki.filter_out(tree, selector)
+ @impl true
+ @doc false
+ def filter(tree, selector), do: Floki.filter_out(tree, selector)
- @impl true
- @doc false
- def to_string(tree), do: Floki.raw_html(tree)
+ @impl true
+ @doc false
+ def to_string(tree), do: Floki.raw_html(tree)
- @impl true
- @doc false
- def text(tree), do: Floki.text(tree)
+ @impl true
+ @doc false
+ def text(tree), do: Floki.text(tree)
- # """
- # This is a tempory fix until mochweb (or floki) has been updated
- # to correctly handle whitespace text nodes: https://github.com/mochi/mochiweb/issues/166
- # """
- defp retain_inline_whitespace(html), do: String.replace(html, ~r/\>[ ]+\, "> <")
+ # """
+ # This is a tempory fix until mochweb (or floki) has been updated
+ # to correctly handle whitespace text nodes: https://github.com/mochi/mochiweb/issues/166
+ # """
+ defp retain_inline_whitespace(html), do: String.replace(html, ~r/\>[ ]+\, "> <")
+ end
end
diff --git a/lib/premailex/html_parser/lazy_html.ex b/lib/premailex/html_parser/lazy_html.ex
new file mode 100644
index 0000000..fe8e6c3
--- /dev/null
+++ b/lib/premailex/html_parser/lazy_html.ex
@@ -0,0 +1,110 @@
+if Code.ensure_loaded?(LazyHTML) do
+ defmodule Premailex.HTMLParser.LazyHTML do
+ @moduledoc false
+
+ @behaviour Premailex.HTMLParser
+
+ @impl true
+ @doc false
+ def parse(html) do
+ is_document = Regex.match?(~r/ LazyHTML.from_document()
+ |> LazyHTML.to_tree(skip_whitespace_nodes: true)
+ else
+ html
+ |> LazyHTML.from_fragment()
+ |> LazyHTML.to_tree(skip_whitespace_nodes: true)
+ end
+ |> Enum.reject(&empty_text_node?/1)
+
+ case result do
+ [html] -> html
+ html when is_list(html) -> html
+ html -> html
+ end
+ end
+
+ @impl true
+ @doc false
+ def all(tree, selector) do
+ tree
+ |> to_lazy_html()
+ |> LazyHTML.query(selector)
+ |> LazyHTML.to_tree(skip_whitespace_nodes: true)
+ end
+
+ @impl true
+ @doc false
+ def filter(tree, selector) do
+ tree_list = normalize_tree(tree)
+
+ filter_tree(tree_list, selector)
+ end
+
+ @impl true
+ @doc false
+ def to_string(tree) do
+ tree
+ |> to_lazy_html()
+ |> LazyHTML.to_html(skip_whitespace_nodes: true)
+ end
+
+ @impl true
+ @doc false
+ def text(tree) do
+ tree
+ |> to_lazy_html()
+ |> LazyHTML.text()
+ end
+
+ defp to_lazy_html(tree) when is_list(tree) do
+ LazyHTML.from_tree(tree)
+ end
+
+ defp to_lazy_html(tree) when is_tuple(tree) do
+ LazyHTML.from_tree([tree])
+ end
+
+ defp normalize_tree(tree) when is_list(tree), do: tree
+ defp normalize_tree(tree) when is_tuple(tree), do: [tree]
+
+ defp filter_tree(tree_list, selector) when is_list(tree_list) do
+ tree_list
+ |> Enum.map(fn node -> filter_node(node, selector) end)
+ |> Enum.reject(fn node -> is_nil(node) or empty_text_node?(node) end)
+ end
+
+ defp empty_text_node?(""), do: true
+ defp empty_text_node?(text) when is_binary(text), do: String.trim(text) == ""
+ defp empty_text_node?(_), do: false
+
+ defp filter_node(node, selector) when is_tuple(node) do
+ {tag, attrs, children} = node
+
+ node_without_children = {tag, attrs, []}
+ lazy_html = LazyHTML.from_tree([node_without_children])
+ matches = LazyHTML.query(lazy_html, selector)
+
+ node_matches = LazyHTML.to_tree(matches) != []
+
+ if node_matches do
+ nil
+ else
+ filtered_children = filter_tree(children, selector)
+ {tag, attrs, filtered_children}
+ end
+ end
+
+ defp filter_node(node, _selector) when is_binary(node) do
+ node
+ end
+
+ defp filter_node({:comment, _text} = node, _selector) do
+ node
+ end
+ end
+end
diff --git a/mix.exs b/mix.exs
index 3008e8c..83cbb8e 100644
--- a/mix.exs
+++ b/mix.exs
@@ -17,6 +17,7 @@ defmodule Premailex.Mixfile do
Meeseeks,
Meeseeks.Document,
Meeseeks.Selector.CSS,
+ LazyHTML,
:ssl_verify_hostname
]
],
@@ -39,8 +40,9 @@ defmodule Premailex.Mixfile do
defp deps do
[
- {:floki, "~> 0.19"},
+ {:floki, "~> 0.19", optional: true},
{:meeseeks, "~> 0.11", optional: true},
+ {:lazy_html, "~> 0.1.8", optional: true},
{:certifi, ">= 0.0.0", optional: true},
{:ssl_verify_fun, ">= 0.0.0", optional: true},
diff --git a/mix.lock b/mix.lock
index 5d7bc58..4e897cf 100644
--- a/mix.lock
+++ b/mix.lock
@@ -1,15 +1,19 @@
%{
"bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"},
"castore": {:hex, :castore, "1.0.18", "5e43ef0ec7d31195dfa5a65a86e6131db999d074179d2ba5a8de11fe14570f55", [:mix], [], "hexpm", "f393e4fe6317829b158fb74d86eb681f737d2fe326aa61ccf6293c4104957e34"},
+ "cc_precompiler": {:hex, :cc_precompiler, "0.1.11", "8c844d0b9fb98a3edea067f94f616b3f6b29b959b6b3bf25fee94ffe34364768", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "3427232caf0835f94680e5bcf082408a70b48ad68a5f5c0b02a3bea9f3a075b9"},
"certifi": {:hex, :certifi, "2.16.0", "a4edfc1d2da3424d478a3271133bf28e0ec5e6fd8c009aab5a4ae980cb165ce9", [:rebar3], [], "hexpm", "8a64f6669d85e9cc0e5086fcf29a5b13de57a13efa23d3582874b9a19303f184"},
"credo": {:hex, :credo, "1.7.17", "f92b6aa5b26301eaa5a35e4d48ebf5aa1e7094ac00ae38f87086c562caf8a22f", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "1eb5645c835f0b6c9b5410f94b5a185057bcf6d62a9c2b476da971cde8749645"},
"dialyxir": {:hex, :dialyxir, "1.4.7", "dda948fcee52962e4b6c5b4b16b2d8fa7d50d8645bbae8b8685c3f9ecb7f5f4d", [:mix], [{:erlex, ">= 0.2.8", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b34527202e6eb8cee198efec110996c25c5898f43a4094df157f8d28f27d9efe"},
"earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"},
+ "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"},
"erlex": {:hex, :erlex, "0.2.8", "cd8116f20f3c0afe376d1e8d1f0ae2452337729f68be016ea544a72f767d9c12", [:mix], [], "hexpm", "9d66ff9fedf69e49dc3fd12831e12a8a37b76f8651dd21cd45fcf5561a8a7590"},
"ex_doc": {:hex, :ex_doc, "0.40.1", "67542e4b6dde74811cfd580e2c0149b78010fd13001fda7cfeb2b2c2ffb1344d", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "bcef0e2d360d93ac19f01a85d58f91752d930c0a30e2681145feea6bd3516e00"},
"file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"},
+ "fine": {:hex, :fine, "0.1.4", "b19a89c1476c7c57afb5f9314aed5960b5bc95d5277de4cb5ee8e1d1616ce379", [:mix], [], "hexpm", "be3324cc454a42d80951cf6023b9954e9ff27c6daa255483b3e8d608670303f5"},
"floki": {:hex, :floki, "0.38.1", "f002ccac94b3bcb21d40d9b34cc2cc9fd88a8311879120330075b5dde657ebee", [:mix], [], "hexpm", "e744bf0db7ee34b2c8b62767f04071107af0516a81144b9a2f73fe0494200e5b"},
"jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
+ "lazy_html": {:hex, :lazy_html, "0.1.10", "ffe42a0b4e70859cf21a33e12a251e0c76c1dff76391609bd56702a0ef5bc429", [:make, :mix], [{:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.9.0", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:fine, "~> 0.1.0", [hex: :fine, repo: "hexpm", optional: false]}], "hexpm", "50f67e5faa09d45a99c1ddf3fac004f051997877dc8974c5797bb5ccd8e27058"},
"makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
"makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
"makeup_erlang": {:hex, :makeup_erlang, "1.0.3", "4252d5d4098da7415c390e847c814bad3764c94a814a0b4245176215615e1035", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "953297c02582a33411ac6208f2c6e55f0e870df7f80da724ed613f10e6706afd"},
diff --git a/test/premailex/html_parser/lazy_html_test.exs b/test/premailex/html_parser/lazy_html_test.exs
new file mode 100644
index 0000000..7952fdb
--- /dev/null
+++ b/test/premailex/html_parser/lazy_html_test.exs
@@ -0,0 +1,4 @@
+defmodule Premailex.HTMLParser.LazyHTMLTest do
+ use ExUnit.Case
+ doctest Premailex.HTMLParser.LazyHTML
+end
diff --git a/test/test_helper.exs b/test/test_helper.exs
index 91e6ac7..e603e6e 100644
--- a/test/test_helper.exs
+++ b/test/test_helper.exs
@@ -1,4 +1,8 @@
ExUnit.start()
-if System.get_env("HTML_PARSER") == "meeseeks",
- do: Application.put_env(:premailex, :html_parser, Premailex.HTMLParser.Meeseeks)
+html_parser = System.get_env("HTML_PARSER", "Floki")
+html_parser = Module.concat(Premailex.HTMLParser, html_parser)
+
+Application.put_env(:premailex, :html_parser, html_parser)
+
+IO.puts("Testing with #{inspect(html_parser)}")