11# collect_yaml_lists <file.yaml> [--print] [--prefix PREFIX]
2- # Pure Bash + awk YAML reader (no yq).
3- # Supported YAML shape at the top level:
2+ # Pure Bash + ultra-portable awk. Supports top-level forms:
43# key:
54# - item
6- # - item
5+ # - " item with spaces"
76# other: [a, b, "c"]
8- #
9- # Creates:
10- # categories[] : sanitized array names (with optional PREFIX)
11- # orig_keys[] : original YAML keys (aligned with categories[])
12- # one array per key, e.g. key "pip3" -> $pip3 (or $PREFIXpip3)
13- #
14- # Notes:
15- # - This is a pragmatic parser for common dotfiles/package lists.
16- # - It ignores nested structures under items and normalizes simple quotes.
17-
187collect_yaml_lists () {
198 # ---- args ----
209 if [ $# -lt 1 ]; then
@@ -49,7 +38,7 @@ collect_yaml_lists() {
4938 return 1
5039 }
5140
52- # ---- shell detection (zsh arrays are 1-based & need typeset ) ----
41+ # ---- shell detection (zsh arrays need typeset and are 1-based) ----
5342 local is_zsh=0
5443 [ -n " ${ZSH_VERSION:- } " ] && is_zsh=1
5544 if [ " $is_zsh " -eq 1 ]; then
@@ -71,7 +60,7 @@ collect_yaml_lists() {
7160 for n in " ${categories[@]} " ; do [ " $n " = " $1 " ] && return 0; done
7261 return 1
7362 }
74- _assign_array () { # _assign_array <varname> <elements...> (portable via printf %q + eval)
63+ _assign_array () { # _assign_array <varname> <elements...>
7564 local v=" $1 "
7665 shift
7766 local assign=" $v =(" x
@@ -80,118 +69,115 @@ collect_yaml_lists() {
8069 eval " $assign "
8170 }
8271
83- # ---- parse YAML with awk -> token stream (__KEY__/__ITEM__/__END__) ----
84- # Pragmatic parser: handles top-level "key:" + block-style list and "key: [a, b, 'c']"
85- # Strips comments that are not inside quotes.
72+ # ---- awk: emit tokens __KEY__/__ITEM__/__END__ (no functions; minimal regex) ----
73+ # - Removes comments only if preceded by space or tab: <space>#...
74+ # - Trims using two sub() calls (no alternation)
75+ # - Flow list split is naive (commas inside quotes not supported)
8676 local awk_out
8777 awk_out=" $(
8878 awk '
89- function trim(s){ gsub(/^[ \t]+|[ \t]+$/, "", s); return s }
90- function dequote(s){
91- if (s ~ /^".*"$/) { s=substr(s,2,length(s)-2) }
92- else if (s ~ /^' \' ' .*' \' ' $/) { s=substr(s,2,length(s)-2) }
93- return s
94- }
95- function strip_comments(line, i,c,dq,sq,esc,out,len) {
96- dq=0; sq=0; esc=0; out=""; len=length(line)
97- for (i=1; i<=len; i++) {
98- c=substr(line,i,1)
99- if (esc) { out=out c; esc=0; continue }
100- if (c=="\\") { out=out c; esc=1; continue }
101- if (c=="\"" && !sq) { dq=!dq; out=out c; continue }
102- if (c=="' \' ' " && !dq) { sq=!sq; out=out c; continue }
103- if (c=="#" && !dq && !sq) { break } # start of comment
104- out=out c
105- }
106- return out
107- }
108- BEGIN { in_list=0; base_indent=-1; key="" }
79+ BEGIN { in_list=0; base_indent=-1 }
10980 {
110- line = strip_comments($0)
111- if (line ~ /^[ \t]*$/) next
112- # Flow style: key: [a, b, "c"]
81+ line=$0
82+ sub(/[ \t]#[^\n]*$/, "", line) # strip trailing comments after space/tab + #
83+ # blank?
84+ tmp=line; sub(/^[ \t]+/, "", tmp); sub(/[ \t]+$/, "", tmp); if (tmp == "") next
85+
86+ # Flow: key: [ ... ]
11387 if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, m)) {
114- key = trim( m[2])
88+ key= m[2]; sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key )
11589 print "__KEY__" key
116- s = m[3]
117- n = split(s, a, /,/)
118- for (i=1;i<=n;i++){
119- val=trim(a[i]); val=dequote(val)
90+ s=m[3]
91+ n=split(s, a, /,/)
92+ for (i=1; i<=n; i++) {
93+ val=a[i]; sub(/^[ \t]+/, "", val); sub(/[ \t]+$/, "", val)
94+ # dequote simple "..." or ' ...'
95+ len=length(val)
96+ if (len>=2) {
97+ first=substr(val,1,1); last=substr(val,len,1)
98+ if ((first=="\"" && last=="\"") || (first=="' " && last==" ' ")) {
99+ val=substr(val,2,len-2)
100+ }
101+ }
120102 if (val!="") print "__ITEM__" val
121103 }
122104 print "__END__"
123- in_list=0; key=""; next
105+ in_list=0; next
124106 }
125107
126- # Block style start: key:
108+ # Block list start: key:
127109 if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, m)) {
128- # if we were in a previous list, close it
129- if (in_list==1) { print "__END__"; in_list=0; key="" }
130- key = trim(m[2])
131- base_indent = length(m[1])
132- in_list = 1
110+ if (in_list==1) { print "__END__"; in_list=0 }
111+ key=m[2]; sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
112+ base_indent=length(m[1]); in_list=1
133113 print "__KEY__" key
134114 next
135115 }
136116
137- # If inside a list, look for items or dedent/new key
117+ # Inside block list?
138118 if (in_list==1) {
139- # New key at dedent/ same indent ends current list
119+ # New key at same-or-less indent ends current list
140120 if (match(line, /^([ \t]*)([^:# \t][^:]*):/, mk)) {
141121 if (length(mk[1]) <= base_indent) {
142- print "__END__"
143- in_list=0; key=""
144- # Reprocess same line as a new key; emulate tail recursion:
145- # Flow style?
122+ print "__END__"; in_list=0
123+ # Re-handle this line as new start (flow or block)
146124 if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, mm)) {
147- key = trim(mm[2]); print "__KEY__" key
148- s = mm[3]; n = split(s, a, /,/)
149- for (i=1;i<=n;i++){ val=trim(a[i]); val=dequote(val); if (val!="") print "__ITEM__" val }
125+ key=mm[2]; sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
126+ print "__KEY__" key
127+ s=mm[3]; n=split(s, a2, /,/)
128+ for (i=1; i<=n; i++) {
129+ val=a2[i]; sub(/^[ \t]+/, "", val); sub(/[ \t]+$/, "", val)
130+ len=length(val)
131+ if (len>=2) {
132+ first=substr(val,1,1); last=substr(val,len,1)
133+ if ((first=="\"" && last=="\"") || (first=="' " && last==" ' ")) {
134+ val=substr(val,2,len-2)
135+ }
136+ }
137+ if (val!="") print "__ITEM__" val
138+ }
150139 print "__END__"
151140 next
152141 }
153- # Block style start
154142 if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, mm2)) {
155- key = trim(mm2[2]); base_indent = length(mm2[1]); in_list=1; print "__KEY__" key
143+ key=mm2[2]; sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
144+ base_indent=length(mm2[1]); in_list=1; print "__KEY__" key
156145 next
157146 }
158147 }
159148 }
149+
160150 # List item: - value
161151 if (match(line, /^[ \t]*-[ \t]*(.+)$/, mi)) {
162- item = trim(mi[1]); item = dequote(item)
152+ item=mi[1]; sub(/^[ \t]+/, "", item); sub(/[ \t]+$/, "", item)
153+ len=length(item)
154+ if (len>=2) {
155+ first=substr(item,1,1); last=substr(item,len,1)
156+ if ((first=="\"" && last=="\"") || (first=="' " && last==" ' ")) {
157+ item=substr(item,2,len-2)
158+ }
159+ }
163160 if (item!="") print "__ITEM__" item
164161 next
165162 }
166- # Otherwise ignore until dedent/new key
167- next
168163 }
169-
170- # Lines outside list we do not handle (scalars, maps)
171- next
172164 }
173165 END { if (in_list==1) print "__END__" }
174166 ' " $file "
175167 ) "
176168
177- # ---- consume token stream and create arrays ----
178- # We gather items per key, then assign a real shell array with a unique sanitized name.
169+ # ---- consume tokens and create arrays ----
179170 if [ -z " $awk_out " ]; then
180171 echo " No top-level list categories found in: $file " >&2
181172 return 0
182173 fi
183174
184- # read line-by-line portably (bash & zsh)
185175 local cur_key=" " cur_safe=" " base=" " n=1
186- # temp items store (as a plain list we will quote on assignment)
187- if [ " $is_zsh " -eq 1 ]; then
188- typeset -ga __items
189- fi
176+ if [ " $is_zsh " -eq 1 ]; then typeset -ga __items; fi
190177 __items=()
191178
192179 _finalize_current () {
193180 [ -z " $cur_key " ] && return 0
194- # unique, sanitized var name (with prefix)
195181 cur_safe=" $( _sanitize " $cur_key " ) "
196182 cur_safe=" ${prefix}${cur_safe} "
197183 base=" $cur_safe "
@@ -207,20 +193,14 @@ collect_yaml_lists() {
207193 cur_key=" "
208194 }
209195
210- # iterate tokens
211196 while IFS= read -r line; do
212197 case " $line " in
213198 __KEY__* )
214- # finalize previous (if any) then start new
215199 _finalize_current
216200 cur_key=" ${line# __KEY__} "
217201 ;;
218- __ITEM__* )
219- __items+=(" ${line# __ITEM__} " )
220- ;;
221- __END__)
222- _finalize_current
223- ;;
202+ __ITEM__* ) __items+=(" ${line# __ITEM__} " ) ;;
203+ __END__) _finalize_current ;;
224204 esac
225205 done << EOF
226206$awk_out
0 commit comments