Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,18 @@
[![Hex.pm](https://img.shields.io/hexpm/v/toon.svg)](https://hex.pm/packages/toon)
[![Documentation](https://img.shields.io/badge/docs-hexdocs-blue.svg)](https://hexdocs.pm/toon)
[![Coverage Status](https://coveralls.io/repos/github/xu-chris/toon_ex/badge.svg?branch=main)](https://coveralls.io/github/xu-chris/toon_ex?branch=main)
[![SPEC v3.0](https://img.shields.io/badge/spec-v3.0-fef3c0?labelColor=1b1b1f)](https://github.com/toon-format/spec)
[![License: MIT](https://img.shields.io/badge/license-MIT-fef3c0?labelColor=1b1b1f)](./LICENSE.md)

**TOON (Token-Oriented Object Notation)** encoder and decoder for Elixir.

TOON is a compact data format optimized for LLM token efficiency, achieving **30-60% token reduction** compared to JSON while maintaining readability.

## 🎯 Specification Compliance

This implementation is tested against the [official TOON specification v1.3.3](https://github.com/toon-format/spec) (2025-10-31) using the official test fixtures.
This implementation is tested against the [official TOON specification v3.0.1](https://github.com/toon-format/spec) (2025-12-05) using the official test fixtures.

**Test Fixtures:** [toon-format/spec@b9c71f7](https://github.com/toon-format/spec/tree/b9c71f72f1d243b17a5c21a56273d556a7a08007)

**Compliance Status:**
- ✅ **100% (306/306 tests passing)**
- ✅ **Decoder: 100% (160/160 tests)**
- ✅ **Encoder: 100% (146/146 tests)**
**Test Fixtures:** [toon-format/spec@f9af6fa](https://github.com/toon-format/spec/tree/v3.0.1)

Tests validate semantic equivalence (both outputs decode to the same data structure), ensuring correctness independent of Elixir 1.19's automatic key sorting.

Expand Down
91 changes: 89 additions & 2 deletions lib/toon/decode/decode.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ defmodule Toon.Decode do
alias Toon.Decode.{Options, StructuralParser}
alias Toon.DecodeError

@identifier_segment_pattern ~r/^[A-Za-z_][A-Za-z0-9_]*$/

@typedoc "Decoded TOON value"
@type decoded :: nil | boolean() | binary() | number() | list() | map()

Expand Down Expand Up @@ -111,11 +113,96 @@ defmodule Toon.Decode do
defp do_decode(string, opts) do
# Use structural parser for full TOON support
case StructuralParser.parse(string, opts) do
{:ok, result} ->
result
{:ok, {result, metadata}} ->
maybe_expand_paths(result, metadata, opts)

{:error, error} ->
raise error
end
end

# Path expansion per spec v1.5 section 13.4 - entry point with metadata
defp maybe_expand_paths(result, metadata, %{expand_paths: "safe"} = opts) when is_map(result) do
quoted_keys = metadata.quoted_keys
strict = Map.get(opts, :strict, true)
ordered_keys = get_ordered_keys(result, metadata.key_order)

Enum.reduce(ordered_keys, %{}, fn key, acc ->
value = Map.get(result, key) |> maybe_expand_paths_nested(opts)
process_key(acc, key, value, quoted_keys, strict)
end)
end

defp maybe_expand_paths(result, _metadata, opts), do: maybe_expand_paths_nested(result, opts)

# Get keys in document order, falling back to map keys
defp get_ordered_keys(result, []), do: Map.keys(result)
defp get_ordered_keys(result, key_order), do: Enum.filter(key_order, &Map.has_key?(result, &1))

# Process a single key - either expand dotted path or insert directly
defp process_key(acc, key, value, quoted_keys, strict) do
if should_expand?(key, quoted_keys) do
nested = build_nested(String.split(key, "."), value)
deep_merge_with_conflict(acc, nested, strict)
else
insert_key(acc, key, value, strict)
end
end

defp should_expand?(key, quoted_keys) do
expandable_key?(key) and not MapSet.member?(quoted_keys, key)
end

# Insert key with conflict checking
defp insert_key(acc, key, value, _strict) when not is_map_key(acc, key) do
Map.put(acc, key, value)
end

defp insert_key(_acc, key, _value, true = _strict) do
raise DecodeError, message: "Path expansion conflict at key '#{key}'", reason: :path_conflict
end

defp insert_key(acc, key, value, false = _strict) do
Map.put(acc, key, value)
end

# Recursive path expansion for nested structures (no metadata needed)
defp maybe_expand_paths_nested(result, %{expand_paths: "safe"} = opts) when is_list(result) do
Enum.map(result, &maybe_expand_paths_nested(&1, opts))
end

defp maybe_expand_paths_nested(result, _opts), do: result

# IdentifierSegment: [A-Za-z_][A-Za-z0-9_]*
defp expandable_key?(key) do
String.contains?(key, ".") and
key
|> String.split(".")
|> Enum.all?(&Regex.match?(@identifier_segment_pattern, &1))
end

defp build_nested([segment], value), do: %{segment => value}
defp build_nested([segment | rest], value), do: %{segment => build_nested(rest, value)}

defp deep_merge_with_conflict(map1, map2, strict) do
Map.merge(map1, map2, &resolve_merge(&1, &2, &3, strict))
end

defp resolve_merge(_key, v1, v2, strict) when is_map(v1) and is_map(v2) do
deep_merge_with_conflict(v1, v2, strict)
end

defp resolve_merge(key, v1, v2, strict) when is_map(v1) or is_map(v2) do
handle_type_conflict(key, v2, strict)
end

defp resolve_merge(_key, _v1, v2, _strict), do: v2

defp handle_type_conflict(key, _v2, true) do
raise DecodeError,
message: "Path expansion conflict at key '#{key}': incompatible types",
reason: :path_conflict
end

defp handle_type_conflict(_key, v2, false), do: v2
end
5 changes: 5 additions & 0 deletions lib/toon/decode/options.ex
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ defmodule Toon.Decode.Options do
type: :pos_integer,
default: 2,
doc: "Expected indentation size in spaces (for strict mode validation)"
],
expand_paths: [
type: {:in, ["off", "safe"]},
default: "off",
doc: "Path expansion: 'off' | 'safe' (expand unquoted dotted keys)"
]
]

Expand Down
66 changes: 51 additions & 15 deletions lib/toon/decode/parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,17 @@ defmodule Toon.Decode.Parser do
unquoted_string =
utf8_string([?a..?z, ?A..?Z, ?0..?9, ?_, ?., ?-], min: 1)

# Quoted key: tag specially to track during path expansion
quoted_key =
quoted_string
|> unwrap_and_tag(:quoted_key)

# Key: quoted or unquoted
key =
choice([
quoted_string,
unquoted_string
quoted_key,
unquoted_string |> unwrap_and_tag(:key)
])
|> unwrap_and_tag(:key)

# Null literal
null_value = string("null") |> replace(nil) |> unwrap_and_tag(:null)
Expand All @@ -51,11 +55,16 @@ defmodule Toon.Decode.Parser do
])
|> unwrap_and_tag(:bool)

# Number: integer or float
# Number: integer, float, or exponent notation (e.g., 1e6, -1E+03, 2.5e-2)
number_value =
optional(ascii_string([?-], 1))
|> concat(ascii_string([?0..?9], min: 1))
|> optional(ascii_string([?.], 1) |> concat(ascii_string([?0..?9], min: 1)))
|> optional(
ascii_string([?e, ?E], 1)
|> concat(optional(ascii_string([?+, ?-], 1)))
|> concat(ascii_string([?0..?9], min: 1))
)
|> reduce({Enum, :join, [""]})
|> map({__MODULE__, :parse_number, []})
|> unwrap_and_tag(:number)
Expand All @@ -79,21 +88,37 @@ defmodule Toon.Decode.Parser do
string_value
])

# Array length marker: [123] or [#123] or [123\t] or [123|]
# Array length marker: [123] or [123\t] or [123|]
# Per TOON spec v2.0, [#N] format is no longer valid
# Per TOON spec Section 6, non-comma delimiters are indicated in the header
array_length =
ignore(string("["))
|> optional(ignore(string("#")))
|> ascii_string([?0..?9], min: 1)
|> optional(ignore(choice([string("\t"), string("|")])))
|> ignore(string("]"))
|> map({String, :to_integer, []})
|> unwrap_and_tag(:array_length)

# Optional whitespace (for tolerating spaces around delimiters)
optional_ws = ignore(repeat(ascii_char([?\s])))

# Delimiter with optional surrounding whitespace
delimiter_with_ws =
optional_ws
|> concat(ignore(@delimiter))
|> concat(optional_ws)

# Empty value (for handling empty tokens like a,,c)
empty_value = lookahead(choice([@delimiter, eos()])) |> replace("") |> unwrap_and_tag(:string)

# Array value: either a primitive or empty string
array_value = choice([primitive_value, empty_value])

# Inline array values: val1,val2,val3 (or tab/pipe separated)
# Supports spaces around delimiters and empty tokens
inline_array_values =
primitive_value
|> repeat(ignore(@delimiter) |> concat(primitive_value))
array_value
|> repeat(delimiter_with_ws |> concat(array_value))
|> tag(:inline_array)

# Key-value pair: key: value
Expand Down Expand Up @@ -134,25 +159,36 @@ defmodule Toon.Decode.Parser do

@doc false
def parse_number(str) when is_binary(str) do
if String.contains?(str, ".") do
String.to_float(str)
else
String.to_integer(str)
cond do
# Exponent notation (e.g., 1e6, -1E+03, 2.5e-2)
String.contains?(str, "e") or String.contains?(str, "E") ->
# Float.parse handles exponent notation
{float, ""} = Float.parse(str)
# If result is a whole number, return as integer
if float == trunc(float), do: trunc(float), else: float

# Decimal float (e.g., 1.5, -2.0)
String.contains?(str, ".") ->
String.to_float(str)

# Integer
true ->
String.to_integer(str)
end
end

@doc false
def make_kv([{:key, key}, {_type, value}]) do
def make_kv([{_key_type, key}, {_type, value}]) do
{key, value}
end

@doc false
def make_empty_array_kv([{:key, key}, {:array_length, _len}]) do
def make_empty_array_kv([{_key_type, key}, {:array_length, _len}]) do
{key, []}
end

@doc false
def make_array_kv([{:key, key}, {:array_length, _len}, {:inline_array, values}]) do
def make_array_kv([{_key_type, key}, {:array_length, _len}, {:inline_array, values}]) do
array_values = Enum.map(values, fn {_type, val} -> val end)
{key, array_values}
end
Expand Down
Loading
Loading