diff --git a/packages/cli/README.md b/packages/cli/README.md index aab4fd8..1aaeab7 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -60,7 +60,7 @@ cat data.toon | toon --decode | `-o, --output ` | Output file path (prints to stdout if omitted) | | `-e, --encode` | Force encode mode (overrides auto-detection) | | `-d, --decode` | Force decode mode (overrides auto-detection) | -| `--delimiter ` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) | +| `--delimiter ` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe), or `auto` | | `--indent ` | Indentation size (default: `2`) | | `--stats` | Show token count estimates and savings (encode only) | | `--no-strict` | Disable strict validation when decoding | @@ -94,6 +94,13 @@ Example output: toon data.json --delimiter "\t" -o output.toon ``` +#### Auto-select delimiter + +```bash +# Let TOON choose the delimiter that avoids extra quoting +toon data.json --delimiter auto -o output.toon +``` + #### Pipe-separated with length markers ```bash diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 289be54..46f13bc 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -94,7 +94,7 @@ export const mainCommand: CommandDef<{ }, delimiter: { type: 'string', - description: 'Delimiter for arrays: comma (,), tab (\\t), or pipe (|)', + description: 'Delimiter for arrays: comma (,), tab (\\t), pipe (|), or auto', default: ',', }, indent: { @@ -142,10 +142,14 @@ export const mainCommand: CommandDef<{ } // Validate delimiter - const delimiter = args.delimiter || DEFAULT_DELIMITER - if (!(Object.values(DELIMITERS)).includes(delimiter as Delimiter)) { - throw new Error(`Invalid delimiter "${delimiter}". Valid delimiters are: comma (,), tab (\\t), pipe (|)`) + const delimiterInput = args.delimiter || DEFAULT_DELIMITER + const delimiterValues = Object.values(DELIMITERS) + if (delimiterInput !== 'auto' && !delimiterValues.includes(delimiterInput as Delimiter)) { + throw new Error(`Invalid delimiter "${delimiterInput}". Valid delimiters are: comma (,), tab (\\t), pipe (|), auto`) } + const delimiterOption = (delimiterInput === 'auto' + ? 'auto' + : delimiterInput) as NonNullable // Validate `keyFolding` const keyFolding = args.keyFolding || 'off' @@ -175,7 +179,7 @@ export const mainCommand: CommandDef<{ await encodeToToon({ input: inputSource, output: outputPath, - delimiter: delimiter as Delimiter, + delimiter: delimiterOption, indent, keyFolding: keyFolding as NonNullable, flattenDepth, diff --git a/packages/toon/src/encode/encoders.ts b/packages/toon/src/encode/encoders.ts index b20f221..b7652a0 100644 --- a/packages/toon/src/encode/encoders.ts +++ b/packages/toon/src/encode/encoders.ts @@ -1,5 +1,5 @@ -import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types' -import { DOT, LIST_ITEM_MARKER } from '../constants' +import type { Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types' +import { DELIMITERS, DOT, LIST_ITEM_MARKER } from '../constants' import { tryFoldKeyChain } from './folding' import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isEmptyObject, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize' import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives' @@ -120,7 +120,8 @@ export function encodeArray( // Primitive array if (isArrayOfPrimitives(value)) { - const arrayLine = encodeInlineArrayLine(value, options.delimiter, key) + const delimiter = resolveDelimiterForPrimitiveArray(value, options) + const arrayLine = encodeInlineArrayLine(value, delimiter, key) writer.push(depth, arrayLine) return } @@ -129,7 +130,8 @@ export function encodeArray( if (isArrayOfArrays(value)) { const allPrimitiveArrays = value.every(arr => isArrayOfPrimitives(arr)) if (allPrimitiveArrays) { - encodeArrayOfArraysAsListItems(key, value, writer, depth, options) + const delimiter = resolveDelimiterForArrayOfArrays(value, options) + encodeArrayOfArraysAsListItems(key, value, writer, depth, delimiter) return } } @@ -138,7 +140,8 @@ export function encodeArray( if (isArrayOfObjects(value)) { const header = extractTabularHeader(value) if (header) { - encodeArrayOfObjectsAsTabular(key, value, header, writer, depth, options) + const delimiter = resolveDelimiterForTabularArray(value, options) + encodeArrayOfObjectsAsTabular(key, value, header, writer, depth, options, delimiter) } else { encodeMixedArrayAsListItems(key, value, writer, depth, options) @@ -159,14 +162,14 @@ export function encodeArrayOfArraysAsListItems( values: readonly JsonArray[], writer: LineWriter, depth: Depth, - options: ResolvedEncodeOptions, + delimiter: Delimiter, ): void { - const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter }) + const header = formatHeader(values.length, { key: prefix, delimiter }) writer.push(depth, header) for (const arr of values) { if (isArrayOfPrimitives(arr)) { - const arrayLine = encodeInlineArrayLine(arr, options.delimiter) + const arrayLine = encodeInlineArrayLine(arr, delimiter) writer.pushListItem(depth + 1, arrayLine) } } @@ -193,11 +196,12 @@ export function encodeArrayOfObjectsAsTabular( writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, + delimiter: Delimiter, ): void { - const formattedHeader = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter }) + const formattedHeader = formatHeader(rows.length, { key: prefix, fields: header, delimiter }) writer.push(depth, `${formattedHeader}`) - writeTabularRows(rows, header, writer, depth + 1, options) + writeTabularRows(rows, header, writer, depth + 1, delimiter) } export function extractTabularHeader(rows: readonly JsonObject[]): string[] | undefined { @@ -245,11 +249,11 @@ function writeTabularRows( header: readonly string[], writer: LineWriter, depth: Depth, - options: ResolvedEncodeOptions, + delimiter: Delimiter, ): void { for (const row of rows) { const values = header.map(key => row[key]) - const joinedValue = encodeAndJoinPrimitives(values as JsonPrimitive[], options.delimiter) + const joinedValue = encodeAndJoinPrimitives(values as JsonPrimitive[], delimiter) writer.push(depth, joinedValue) } } @@ -289,7 +293,8 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept else if (isJsonArray(firstValue)) { if (isArrayOfPrimitives(firstValue)) { // Inline format for primitive arrays - const arrayPropertyLine = encodeInlineArrayLine(firstValue, options.delimiter, firstKey) + const delimiter = resolveDelimiterForPrimitiveArray(firstValue, options) + const arrayPropertyLine = encodeInlineArrayLine(firstValue, delimiter, firstKey) writer.pushListItem(depth, arrayPropertyLine) } else if (isArrayOfObjects(firstValue)) { @@ -297,9 +302,10 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept const header = extractTabularHeader(firstValue) if (header) { // Tabular format for uniform arrays of objects - const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter }) + const delimiter = resolveDelimiterForTabularArray(firstValue, options) + const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter }) writer.pushListItem(depth, formattedHeader) - writeTabularRows(firstValue, header, writer, depth + 1, options) + writeTabularRows(firstValue, header, writer, depth + 1, delimiter) } else { // Fall back to list format for non-uniform arrays of objects @@ -347,7 +353,8 @@ function encodeListItemValue( writer.pushListItem(depth, encodePrimitive(value, options.delimiter)) } else if (isJsonArray(value) && isArrayOfPrimitives(value)) { - const arrayLine = encodeInlineArrayLine(value, options.delimiter) + const delimiter = resolveDelimiterForPrimitiveArray(value, options) + const arrayLine = encodeInlineArrayLine(value, delimiter) writer.pushListItem(depth, arrayLine) } else if (isJsonObject(value)) { @@ -356,3 +363,92 @@ function encodeListItemValue( } // #endregion + +// #region Delimiter resolution helpers + +const AUTO_DELIMITER_PRIORITY: readonly Delimiter[] = [ + DELIMITERS.tab, + DELIMITERS.pipe, + DELIMITERS.comma, +] + +function resolveDelimiterForPrimitiveArray(values: readonly JsonPrimitive[], options: ResolvedEncodeOptions): Delimiter { + const strings = collectStringsFromPrimitives(values) + return selectDelimiter(strings, options) +} + +function resolveDelimiterForArrayOfArrays(values: readonly JsonArray[], options: ResolvedEncodeOptions): Delimiter { + const strings: string[] = [] + + for (const arr of values) { + if (isArrayOfPrimitives(arr)) { + collectStringsFromPrimitives(arr, strings) + } + } + + return selectDelimiter(strings, options) +} + +function resolveDelimiterForTabularArray(rows: readonly JsonObject[], options: ResolvedEncodeOptions): Delimiter { + const strings: string[] = [] + + for (const row of rows) { + for (const value of Object.values(row)) { + if (typeof value === 'string') { + strings.push(value) + } + } + } + + return selectDelimiter(strings, options) +} + +function collectStringsFromPrimitives(values: readonly JsonPrimitive[], target: string[] = []): string[] { + for (const value of values) { + if (typeof value === 'string') { + target.push(value) + } + } + return target +} + +function selectDelimiter(strings: readonly string[], options: ResolvedEncodeOptions): Delimiter { + if (strings.length === 0 || options.delimiterStrategy === 'fixed') { + return options.delimiter + } + + let bestDelimiter = options.delimiter + let bestScore = countDelimiterCollisions(strings, bestDelimiter) + + for (const candidate of AUTO_DELIMITER_PRIORITY) { + if (candidate === bestDelimiter) { + continue + } + + const score = countDelimiterCollisions(strings, candidate) + if (score < bestScore) { + bestScore = score + bestDelimiter = candidate + + if (score === 0) { + break + } + } + } + + return bestDelimiter +} + +function countDelimiterCollisions(strings: readonly string[], delimiter: Delimiter): number { + let collisions = 0 + + for (const value of strings) { + if (value.includes(delimiter)) { + collisions++ + } + } + + return collisions +} + +// #endregion diff --git a/packages/toon/src/index.ts b/packages/toon/src/index.ts index e0b870d..1726276 100644 --- a/packages/toon/src/index.ts +++ b/packages/toon/src/index.ts @@ -85,11 +85,27 @@ export function decode(input: string, options?: DecodeOptions): JsonValue { } function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { + const indent = options?.indent ?? 2 + const keyFolding = options?.keyFolding ?? 'off' + const flattenDepth = options?.flattenDepth ?? Number.POSITIVE_INFINITY + const delimiterOption = options?.delimiter ?? DEFAULT_DELIMITER + + if (delimiterOption === 'auto') { + return { + indent, + delimiter: DEFAULT_DELIMITER, + delimiterStrategy: 'auto', + keyFolding, + flattenDepth, + } + } + return { - indent: options?.indent ?? 2, - delimiter: options?.delimiter ?? DEFAULT_DELIMITER, - keyFolding: options?.keyFolding ?? 'off', - flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY, + indent, + delimiter: delimiterOption, + delimiterStrategy: 'fixed', + keyFolding, + flattenDepth, } } diff --git a/packages/toon/src/types.ts b/packages/toon/src/types.ts index a3fd38c..66073e0 100644 --- a/packages/toon/src/types.ts +++ b/packages/toon/src/types.ts @@ -11,6 +11,8 @@ export type JsonValue = JsonPrimitive | JsonObject | JsonArray // #region Encoder options +export type DelimiterOption = Delimiter | 'auto' + export type { Delimiter, DelimiterKey } export interface EncodeOptions { @@ -23,7 +25,7 @@ export interface EncodeOptions { * Delimiter to use for tabular array rows and inline primitive arrays. * @default DELIMITERS.comma */ - delimiter?: Delimiter + delimiter?: DelimiterOption /** * Enable key folding to collapse single-key wrapper chains. * When set to 'safe', nested objects with single keys are collapsed into dotted paths @@ -40,7 +42,13 @@ export interface EncodeOptions { flattenDepth?: number } -export type ResolvedEncodeOptions = Readonly> +export interface ResolvedEncodeOptions { + readonly indent: number + readonly delimiter: Delimiter + readonly delimiterStrategy: 'fixed' | 'auto' + readonly keyFolding: 'off' | 'safe' + readonly flattenDepth: number +} // #endregion diff --git a/packages/toon/test/encode.test.ts b/packages/toon/test/encode.test.ts index cd38ad9..2863879 100644 --- a/packages/toon/test/encode.test.ts +++ b/packages/toon/test/encode.test.ts @@ -43,11 +43,52 @@ for (const fixtures of fixtureFiles) { }) } +describe('auto delimiter selection', () => { + it('prefers delimiter that avoids quoting for inline arrays', () => { + const result = encode({ + tags: ['foo,bar', 'baz'], + }, { delimiter: 'auto' }) + + expect(result).toBe('tags[2 ]: foo,bar baz') + }) + + it('prefers delimiter that avoids quoting for tabular arrays', () => { + const result = encode({ + rows: [ + { name: 'Alice, Bob', id: 1 }, + { name: 'Charlie', id: 2 }, + ], + }, { delimiter: 'auto' }) + + expect(result).toBe([ + 'rows[2 ]{name id}:', + ' Alice, Bob 1', + ' Charlie 2', + ].join('\n')) + }) +}) + function resolveEncodeOptions(options?: TestCase['options']): ResolvedEncodeOptions { + const indent = options?.indent ?? 2 + const keyFolding = options?.keyFolding ?? 'off' + const flattenDepth = options?.flattenDepth ?? Number.POSITIVE_INFINITY + const delimiterOption = options?.delimiter ?? DEFAULT_DELIMITER + + if (delimiterOption === 'auto') { + return { + indent, + delimiter: DEFAULT_DELIMITER, + delimiterStrategy: 'auto', + keyFolding, + flattenDepth, + } + } + return { - indent: options?.indent ?? 2, - delimiter: options?.delimiter ?? DEFAULT_DELIMITER, - keyFolding: options?.keyFolding ?? 'off', - flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY, + indent, + delimiter: delimiterOption, + delimiterStrategy: 'fixed', + keyFolding, + flattenDepth, } }