Skip to content

Commit 9fa719d

Browse files
committed
Refactor
1 parent 539dfc4 commit 9fa719d

File tree

8 files changed

+187
-671
lines changed

8 files changed

+187
-671
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
test:
3636
strategy:
3737
matrix:
38-
html_parser: [floki, meeseeks]
38+
html_parser: [Floki, Meeseeks, LazyHTML, XMERL]
3939
version:
4040
- otp: 28.0
4141
elixir: 1.19

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Requires Elixir 1.14 or higher.
66

77
* Fixed compiler warnings in `Premailex.HTMLParser.Meeseeks`
88
* Fixed invalid spec in `Premailex.HTMLInlineStyles.process/3`
9+
* Added support for `LazyHTML`
910

1011
## v0.3.20 (2025-01-20)
1112

lib/premailex/html_parser.ex

Lines changed: 41 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,14 @@ defmodule Premailex.HTMLParser do
22
@moduledoc """
33
Module that provide HTML parsing API using an underlying HTML parser library.
44
5-
By default, premailex will try to use Floki, then Meeseeks, then LazyHTML
6-
(in that order) based on what's available. You can also explicitly configure
7-
which parser to use in your config:
5+
By default, premailex will try to use Floki, then LazyHTML, then Meeseeks
6+
(in that order) based on what's available.
87
9-
config :premailex, html_parser: Premailex.HTMLParser.LazyHTML
8+
You can explicitly configure which parser to use in your config:
109
11-
At least one HTML parser dependency must be available:
12-
- `{:floki, "~> 0.19"}` (default if available)
13-
- `{:meeseeks, "~> 0.11"}`
14-
- `{:lazy_html, "~> 0.1.8"}`
10+
config :premailex, html_parser: Premailex.HTMLParser.LazyHTML
1511
"""
1612

17-
@parsers_in_order [
18-
Premailex.HTMLParser.Floki,
19-
Premailex.HTMLParser.Meeseeks,
20-
Premailex.HTMLParser.LazyHTML
21-
]
22-
2313
@type html_tree :: tuple() | list()
2414
@type selector :: binary()
2515

@@ -38,7 +28,39 @@ defmodule Premailex.HTMLParser do
3828
{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Title"]}]}]}
3929
"""
4030
@spec parse(binary()) :: html_tree()
41-
def parse(html), do: parser().parse(html)
31+
def parse(html), do: html_parser().parse(html)
32+
33+
defp html_parser do
34+
case Application.get_env(:premailex, :html_parser) || default_html_parser!() do
35+
mod when is_atom(mod) -> mod
36+
other -> raise "Invalid html_parser, got: #{inspect(other)}"
37+
end
38+
end
39+
40+
defp default_html_parser! do
41+
cond do
42+
Code.ensure_loaded?(Floki) ->
43+
Premailex.HTMLParser.Floki
44+
45+
Code.ensure_loaded?(LazyHTML) ->
46+
Premailex.HTMLParser.LazyHTML
47+
48+
Code.ensure_loaded?(Meeseeks) ->
49+
Premailex.HTMLParser.Meeseeks
50+
51+
true ->
52+
raise """
53+
No HTML parser is available. Please add at least one of the following dependencies to your mix.exs:
54+
55+
- {:floki, "~> 0.19"}
56+
- {:meeseeks, "~> 0.11"}
57+
- {:lazy_html, "~> 0.1.8"}
58+
59+
Or explicitly configure a parser:
60+
config :premailex, html_parser: Premailex.HTMLParser.Floki
61+
"""
62+
end
63+
end
4264

4365
@doc """
4466
Searches an HTML tree for the selector.
@@ -49,7 +71,7 @@ defmodule Premailex.HTMLParser do
4971
[{"h1", [], ["Title"]}]
5072
"""
5173
@spec all(html_tree(), selector()) :: [html_tree()]
52-
def all(tree, selector), do: parser().all(tree, selector)
74+
def all(tree, selector), do: html_parser().all(tree, selector)
5375

5476
@doc """
5577
Filters elements matching the selector from the HTML tree.
@@ -60,7 +82,7 @@ defmodule Premailex.HTMLParser do
6082
[{"html", [], [{"head", [], []}, {"body", [], []}]}]
6183
"""
6284
@spec filter(html_tree(), selector()) :: [html_tree()]
63-
def filter(tree, selector), do: parser().filter(tree, selector)
85+
def filter(tree, selector), do: html_parser().filter(tree, selector)
6486

6587
@doc """
6688
Turns an HTML tree into a string.
@@ -71,7 +93,7 @@ defmodule Premailex.HTMLParser do
7193
"<html><head></head><body><h1>Title</h1></body></html>"
7294
"""
7395
@spec to_string(html_tree()) :: binary()
74-
def to_string(tree), do: parser().to_string(tree)
96+
def to_string(tree), do: html_parser().to_string(tree)
7597

7698
@doc """
7799
Extracts text elements from the HTML tree.
@@ -82,66 +104,5 @@ defmodule Premailex.HTMLParser do
82104
"Title"
83105
"""
84106
@spec text(html_tree()) :: binary()
85-
def text(tree), do: parser().text(tree)
86-
87-
defp parser do
88-
case Application.get_env(:premailex, :html_parser) do
89-
nil ->
90-
# No explicit config, try to find an available parser
91-
find_available_parser()
92-
93-
configured_parser ->
94-
# User explicitly configured a parser, verify it's available
95-
if parser_available?(configured_parser) do
96-
configured_parser
97-
else
98-
raise """
99-
The configured HTML parser #{inspect(configured_parser)} is not available.
100-
101-
Please ensure the corresponding dependency is added to your mix.exs:
102-
- For Floki: {:floki, "~> 0.19"}
103-
- For Meeseeks: {:meeseeks, "~> 0.11"}
104-
- For LazyHTML: {:lazy_html, "~> 0.1.8"}
105-
106-
Or configure a different parser in your config:
107-
config :premailex, html_parser: Premailex.HTMLParser.Floki
108-
"""
109-
end
110-
end
111-
end
112-
113-
# Find the first available parser in order of preference
114-
defp find_available_parser do
115-
case Enum.find(@parsers_in_order, &parser_available?/1) do
116-
nil ->
117-
raise """
118-
No HTML parser is available. Please add at least one of the following dependencies to your mix.exs:
119-
120-
- {:floki, "~> 0.19"}
121-
- {:meeseeks, "~> 0.11"}
122-
- {:lazy_html, "~> 0.1.8"}
123-
124-
Or explicitly configure a parser:
125-
config :premailex, html_parser: Premailex.HTMLParser.Floki
126-
"""
127-
128-
parser ->
129-
parser
130-
end
131-
end
132-
133-
# Check if a parser module is available by verifying its dependencies are loaded
134-
defp parser_available?(Premailex.HTMLParser.Floki) do
135-
Code.ensure_loaded?(Floki)
136-
end
137-
138-
defp parser_available?(Premailex.HTMLParser.Meeseeks) do
139-
Code.ensure_loaded?(Meeseeks)
140-
end
141-
142-
defp parser_available?(Premailex.HTMLParser.LazyHTML) do
143-
Code.ensure_loaded?(LazyHTML)
144-
end
145-
146-
defp parser_available?(_), do: false
107+
def text(tree), do: html_parser().text(tree)
147108
end

lib/premailex/html_parser/floki.ex

Lines changed: 51 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,61 @@
1-
defmodule Premailex.HTMLParser.Floki do
2-
@moduledoc false
3-
alias Premailex.HTMLParser
4-
5-
@behaviour HTMLParser
6-
7-
@impl true
8-
@doc false
9-
def parse(html) do
10-
html = retain_inline_whitespace(html)
11-
args = [html]
12-
13-
"< 0.24.0"
14-
|> floki_version_match?()
15-
|> case do
16-
true -> apply(Floki, :parse, args)
17-
false -> apply(Floki, :parse_document, args)
1+
if Code.ensure_loaded?(Floki) do
2+
defmodule Premailex.HTMLParser.Floki do
3+
@moduledoc false
4+
alias Premailex.HTMLParser
5+
6+
@behaviour HTMLParser
7+
8+
@impl true
9+
@doc false
10+
def parse(html) do
11+
html = retain_inline_whitespace(html)
12+
args = [html]
13+
14+
"< 0.24.0"
15+
|> floki_version_match?()
16+
|> case do
17+
true -> apply(Floki, :parse, args)
18+
false -> apply(Floki, :parse_document, args)
19+
end
20+
|> case do
21+
{:ok, [html]} -> html
22+
{:ok, document} -> document
23+
any -> any
24+
end
1825
end
19-
|> case do
20-
{:ok, [html]} -> html
21-
{:ok, document} -> document
22-
any -> any
23-
end
24-
end
2526

26-
defp floki_version_match?(req) do
27-
case :application.get_key(:floki, :vsn) do
28-
{:ok, actual} ->
29-
actual
30-
|> List.to_string()
31-
|> Version.match?(req)
27+
defp floki_version_match?(req) do
28+
case :application.get_key(:floki, :vsn) do
29+
{:ok, actual} ->
30+
actual
31+
|> List.to_string()
32+
|> Version.match?(req)
3233

33-
_any ->
34-
false
34+
_any ->
35+
false
36+
end
3537
end
36-
end
3738

38-
@impl true
39-
@doc false
40-
def all(tree, selector), do: Floki.find(tree, selector)
39+
@impl true
40+
@doc false
41+
def all(tree, selector), do: Floki.find(tree, selector)
4142

42-
@impl true
43-
@doc false
44-
def filter(tree, selector), do: Floki.filter_out(tree, selector)
43+
@impl true
44+
@doc false
45+
def filter(tree, selector), do: Floki.filter_out(tree, selector)
4546

46-
@impl true
47-
@doc false
48-
def to_string(tree), do: Floki.raw_html(tree)
47+
@impl true
48+
@doc false
49+
def to_string(tree), do: Floki.raw_html(tree)
4950

50-
@impl true
51-
@doc false
52-
def text(tree), do: Floki.text(tree)
51+
@impl true
52+
@doc false
53+
def text(tree), do: Floki.text(tree)
5354

54-
# """
55-
# This is a tempory fix until mochweb (or floki) has been updated
56-
# to correctly handle whitespace text nodes: https://github.com/mochi/mochiweb/issues/166
57-
# """
58-
defp retain_inline_whitespace(html), do: String.replace(html, ~r/\>[ ]+\</, ">&#32;<")
55+
# """
56+
# This is a tempory fix until mochweb (or floki) has been updated
57+
# to correctly handle whitespace text nodes: https://github.com/mochi/mochiweb/issues/166
58+
# """
59+
defp retain_inline_whitespace(html), do: String.replace(html, ~r/\>[ ]+\</, ">&#32;<")
60+
end
5961
end

0 commit comments

Comments
 (0)