|
1 | | -# collect_yaml_lists <file.yaml> [--print] [--prefix PREFIX] |
2 | | -# Creates: |
3 | | -# categories : array of created variable names (sanitized, with optional PREFIX) |
4 | | -# orig_keys : array of original YAML keys (same order as categories) |
5 | | -# One array per YAML key, e.g. "pip3" -> $pip3 (or $PREFIXpip3) |
| 1 | +# collect_yaml_lists: extract YAML list items with pure bash |
| 2 | +# Usage: |
| 3 | +# collect_yaml_lists [-a] <key.path|*> <file1> [file2...] |
| 4 | +# -a also print the dot path before each item (path \t item) |
6 | 5 | # |
7 | | -# Notes: |
8 | | -# - Works in bash (3.2+) and zsh. |
9 | | -# - zsh arrays are 1-based; bash arrays are 0-based. (Printing here handles both.) |
10 | | -# - Detects yq flavor (mikefarah vs kislyuk) automatically. |
| 6 | +# Examples: |
| 7 | +# collect_yaml_lists packages config.yml |
| 8 | +# collect_yaml_lists -a services.* *.yml |
| 9 | +# collect_yaml_lists "*" config.yml # all lists with their items |
11 | 10 |
|
12 | 11 | collect_yaml_lists() { |
13 | | - # ---- args ---- |
14 | | - if [ $# -lt 1 ]; then |
15 | | - echo "usage: collect_yaml_lists <file.yaml> [--print] [--prefix PREFIX]" >&2 |
16 | | - return 2 |
| 12 | + local show_path=0 |
| 13 | + if [[ $1 == "-a" ]]; then |
| 14 | + show_path=1 |
| 15 | + shift |
17 | 16 | fi |
18 | | - local file="$1"; shift |
19 | | - local do_print=0 prefix="" |
20 | | - while [ $# -gt 0 ]; do |
21 | | - case "$1" in |
22 | | - --print) do_print=1; shift ;; |
23 | | - --prefix) prefix="$2"; [ -n "$prefix" ] || { echo "--prefix requires a value" >&2; return 2; }; shift 2 ;; |
24 | | - *) echo "unknown option: $1" >&2; return 2 ;; |
25 | | - esac |
26 | | - done |
27 | | - [ -f "$file" ] || { echo "File not found: $file" >&2; return 1; } |
| 17 | + local want="$1" |
| 18 | + shift |
| 19 | + (($# >= 1)) || { |
| 20 | + echo "usage: collect_yaml_lists [-a] <key.path|*> <file...>" >&2 |
| 21 | + return 2 |
| 22 | + } |
28 | 23 |
|
29 | | - # ---- shell detection ---- |
30 | | - local is_zsh=0 |
31 | | - [ -n "${ZSH_VERSION:-}" ] && is_zsh=1 |
| 24 | + # Stacks for the current mapping path and its indents |
| 25 | + local -a kstack=() istack=() |
| 26 | + local line indent key rest this_path item |
| 27 | + local i n parent_idx |
32 | 28 |
|
33 | | - # ---- declare globals as arrays (zsh needs explicit types) ---- |
34 | | - if [ "$is_zsh" -eq 1 ]; then |
35 | | - typeset -ga categories |
36 | | - typeset -ga orig_keys |
37 | | - fi |
38 | | - categories=() |
39 | | - orig_keys=() |
| 29 | + # helpers (pure bash) |
| 30 | + _ltrim() { |
| 31 | + while [[ $1 == " "* ]]; do set -- "${1# }"; done |
| 32 | + printf '%s' "$1" |
| 33 | + } |
| 34 | + _rtrim() { |
| 35 | + local s=$1 |
| 36 | + while [[ $s == *" " ]]; do s=${s% }; done |
| 37 | + printf '%s' "$s" |
| 38 | + } |
| 39 | + _trim() { _rtrim "$(_ltrim "$1")"; } |
| 40 | + _unquote() { |
| 41 | + local s=$1 |
| 42 | + [[ $s == \"*\" && $s == *\" ]] && s=${s:1:${#s}-2} |
| 43 | + [[ $s == \'*\' && $s == *\' ]] && s=${s:1:${#s}-2} |
| 44 | + printf '%s' "$s" |
| 45 | + } |
| 46 | + _indent() { # number of leading spaces |
| 47 | + local s=$1 c=0 |
| 48 | + while [[ $s == ' '* ]]; do |
| 49 | + s=${s# } |
| 50 | + ((c++)) |
| 51 | + done |
| 52 | + printf '%d' "$c" |
| 53 | + } |
| 54 | + _strip_comment() { # naive: chop trailing ' # ...' when not obviously quoted |
| 55 | + local s=$1 |
| 56 | + # If line starts with '#', it's a comment |
| 57 | + [[ $s == \#* ]] && { |
| 58 | + printf '' |
| 59 | + return |
| 60 | + } |
| 61 | + # Remove trailing comment after at least one space |
| 62 | + case "$s" in |
| 63 | + *" #"*) s=${s%%" #"*} ;; |
| 64 | + esac |
| 65 | + printf '%s' "$(_rtrim "$s")" |
| 66 | + } |
40 | 67 |
|
41 | | - # ---- yq flavor detection ---- |
42 | | - local ver_out KEY_FILTER ITEM_FILTER |
43 | | - ver_out="$( (yq -V 2>/dev/null || yq --version 2>/dev/null || true) )" |
44 | | - if printf %s "$ver_out" | grep -qi 'mikefarah'; then |
45 | | - KEY_FILTER='to_entries[] | select(.value | type=="!!seq") | .key' |
46 | | - ITEM_FILTER='.[$k] | select(type=="!!seq")[] | tostring' |
47 | | - else |
48 | | - KEY_FILTER='to_entries[] | select(.value | type=="array") | .key' |
49 | | - ITEM_FILTER='.[$k] | select(type=="array")[] | tostring' |
50 | | - fi |
| 68 | + _emit_item() { |
| 69 | + local path="$1" val="$2" |
| 70 | + # filter by requested path |
| 71 | + if [[ $want != "*" ]]; then |
| 72 | + [[ $path == "$want" ]] || return |
| 73 | + fi |
| 74 | + if ((show_path)); then |
| 75 | + printf '%s\t%s\n' "$path" "$val" |
| 76 | + else |
| 77 | + printf '%s\n' "$val" |
| 78 | + fi |
| 79 | + } |
51 | 80 |
|
52 | | - # ---- helpers ---- |
53 | | - _sanitize() { local s="$1"; s="${s//[^a-zA-Z0-9_]/_}"; case "$s" in [0-9]*) s="_$s";; esac; printf '%s' "$s"; } |
54 | | - _name_in_use() { local n; for n in "${categories[@]}"; do [ "$n" = "$1" ] && return 0; done; return 1; } |
55 | | - _assign_array() { # _assign_array <varname> <elements...> (portable via printf %q + eval) |
56 | | - local v="$1"; shift |
57 | | - local assign="$v=(" x |
58 | | - for x in "$@"; do assign="$assign $(printf '%q' "$x")"; done |
59 | | - assign="$assign )" |
60 | | - eval "$assign" |
| 81 | + # figure parent path for a given indent (list items belong to nearest shallower indent) |
| 82 | + _parent_path_for_indent() { |
| 83 | + local ind=$1 |
| 84 | + # pop until the top stack indent is < ind |
| 85 | + while ((${#istack[@]})); do |
| 86 | + if ((${istack[-1]} < ind)); then break; fi |
| 87 | + unset 'kstack[-1]' 'istack[-1]' |
| 88 | + done |
| 89 | + # join with dots |
| 90 | + local p i |
| 91 | + for ((i = 0; i < ${#kstack[@]}; i++)); do |
| 92 | + if ((i == 0)); then p=${kstack[i]}; else p="$p.${kstack[i]}"; fi |
| 93 | + done |
| 94 | + printf '%s' "$p" |
61 | 95 | } |
62 | 96 |
|
63 | | - # ---- read keys ---- |
64 | | - # (split into lines in a shell-appropriate way) |
65 | | - local -a keys |
66 | | - if [ "$is_zsh" -eq 1 ]; then |
67 | | - keys=("${(f)$(yq -r "$KEY_FILTER" "$file" 2>/dev/null || true)}") |
68 | | - else |
69 | | - keys=() |
70 | | - while IFS= read -r line; do [ -n "$line" ] && keys+=("$line"); done < <(yq -r "$KEY_FILTER" "$file" 2>/dev/null || true) |
71 | | - fi |
72 | | - [ "${#keys[@]}" -gt 0 ] || { echo "No top-level list categories found in: $file" >&2; return 0; } |
| 97 | + # join current stack into a dot path |
| 98 | + _current_path() { |
| 99 | + local p i |
| 100 | + for ((i = 0; i < ${#kstack[@]}; i++)); do |
| 101 | + if ((i == 0)); then p=${kstack[i]}; else p="$p.${kstack[i]}"; fi |
| 102 | + done |
| 103 | + printf '%s' "$p" |
| 104 | + } |
73 | 105 |
|
74 | | - # ---- build arrays ---- |
75 | | - local key safe base n |
76 | | - for key in "${keys[@]}"; do |
77 | | - safe="$(_sanitize "$key")"; safe="${prefix}${safe}" |
78 | | - base="$safe"; n=1; while _name_in_use "$safe"; do n=$((n+1)); safe="${base}_$n"; done |
| 106 | + # process files |
| 107 | + local file |
| 108 | + for file in "$@"; do |
| 109 | + # reset at each file |
| 110 | + kstack=() |
| 111 | + istack=() |
| 112 | + # shellcheck disable=SC2162 |
| 113 | + while IFS= read -r line || [[ -n $line ]]; do |
| 114 | + line="$(_strip_comment "$line")" |
| 115 | + [[ -n $line ]] || continue |
79 | 116 |
|
80 | | - categories+=("$safe") |
81 | | - orig_keys+=("$key") |
| 117 | + indent=$(_indent "$line") |
| 118 | + # KEY: VALUE or KEY: |
| 119 | + if [[ $line =~ ^[[:space:]]*([A-Za-z0-9_.-]+)[[:space:]]*:[[:space:]]*(.*)$ ]]; then |
| 120 | + key="${BASH_REMATCH[1]}" |
| 121 | + rest="${BASH_REMATCH[2]}" |
82 | 122 |
|
83 | | - # gather items for this key |
84 | | - local -a items |
85 | | - if [ "$is_zsh" -eq 1 ]; then |
86 | | - items=("${(f)$(yq -r --arg k "$key" "$ITEM_FILTER" "$file" 2>/dev/null || true)}") |
87 | | - else |
88 | | - items=() |
89 | | - while IFS= read -r line; do items+=("$line"); done < <(yq -r --arg k "$key" "$ITEM_FILTER" "$file" 2>/dev/null || true) |
90 | | - fi |
| 123 | + # adjust stack for this indent level |
| 124 | + while ((${#istack[@]})) && ((${istack[-1]} >= indent)); do |
| 125 | + unset 'kstack[-1]' 'istack[-1]' |
| 126 | + done |
| 127 | + kstack+=("$key") |
| 128 | + istack+=("$indent") |
91 | 129 |
|
92 | | - _assign_array "$safe" "${items[@]}" |
93 | | - done |
| 130 | + # Inline list: key: [a, b, "c d"] |
| 131 | + if [[ $rest == \[*\] ]]; then |
| 132 | + local content=${rest#\[} |
| 133 | + content=${content%\]} |
| 134 | + # split by commas (naive, quoted strings allowed if they don't include commas) |
| 135 | + local part |
| 136 | + while :; do |
| 137 | + # take next comma piece |
| 138 | + case "$content" in |
| 139 | + *","*) |
| 140 | + part=${content%%,*} |
| 141 | + content=${content#*,} |
| 142 | + ;; |
| 143 | + *) |
| 144 | + part=$content |
| 145 | + content= |
| 146 | + ;; |
| 147 | + esac |
| 148 | + part=$(_trim "$part") |
| 149 | + [[ -n $part ]] && _emit_item "$(_current_path)" "$(_unquote "$part")" |
| 150 | + [[ -z $content ]] && break |
| 151 | + done |
| 152 | + fi |
| 153 | + continue |
| 154 | + fi |
94 | 155 |
|
95 | | - # ---- optional pretty print (handles zsh 1-based vs bash 0-based) ---- |
96 | | - if [ "$do_print" -eq 1 ]; then |
97 | | - if [ "$is_zsh" -eq 1 ]; then |
98 | | - local i=1 max=${#categories[@]} name orig |
99 | | - while [ $i -le $max ]; do |
100 | | - name="${categories[$i]}"; orig="${orig_keys[$i]}" |
101 | | - printf '%s:\n' "$orig" |
102 | | - eval 'for __it in "${'"$name"'[@]}"; do printf " - %s\n" "$__it"; done' |
103 | | - i=$((i+1)) |
104 | | - done |
105 | | - else |
106 | | - local i name orig |
107 | | - for i in "${!categories[@]}"; do |
108 | | - name="${categories[$i]}"; orig="${orig_keys[$i]}" |
109 | | - printf '%s:\n' "$orig" |
110 | | - eval 'for __it in "${'"$name"'[@]}"; do printf " - %s\n" "$__it"; done' |
111 | | - done |
112 | | - fi |
113 | | - fi |
114 | | -} |
| 156 | + # List item: "- value" |
| 157 | + if [[ $line =~ ^[[:space:]]*-[[:space:]]*(.*)$ ]]; then |
| 158 | + item="$(_unquote "$(_trim "${BASH_REMATCH[1]}")")" |
| 159 | + # parent is nearest mapping key with indent < current indent |
| 160 | + this_path="$(_parent_path_for_indent "$indent")" |
| 161 | + # Only emit if we actually have a parent path (ignore top-level bare lists) |
| 162 | + [[ -n $this_path ]] && _emit_item "$this_path" "$item" |
| 163 | + continue |
| 164 | + fi |
115 | 165 |
|
| 166 | + # Anything else is ignored (scalars, multi-line blocks, etc.) |
| 167 | + done <"$file" |
| 168 | + done |
| 169 | +} |
0 commit comments