11# collect_yaml_lists <file.yaml> [--print] [--prefix PREFIX]
2- # Pure Bash + awk YAML reader (no yq) .
3- # Supported YAML shape at the top level:
2+ # Pure Bash + awk (BSD/GNU) YAML reader for simple dotfiles lists .
3+ # Supports top- level:
44# key:
55# - item
6- # - item
6+ # - " item with spaces"
77# other: [a, b, "c"]
8- #
9- # Creates:
10- # categories[] : sanitized array names (with optional PREFIX)
11- # orig_keys[] : original YAML keys (aligned with categories[])
12- # one array per key, e.g. key "pip3" -> $pip3 (or $PREFIXpip3)
13- #
14- # Notes:
15- # - This is a pragmatic parser for common dotfiles/package lists.
16- # - It ignores nested structures under items and normalizes simple quotes.
17-
188collect_yaml_lists () {
199 # ---- args ----
2010 if [ $# -lt 1 ]; then
@@ -80,118 +70,107 @@ collect_yaml_lists() {
8070 eval " $assign "
8171 }
8272
83- # ---- parse YAML with awk -> token stream (__KEY__/__ITEM__/__END__) ----
84- # Pragmatic parser: handles top-level "key:" + block-style list and "key: [a, b, 'c']"
85- # Strips comments that are not inside quotes.
73+ # ---- parse YAML with awk (script delivered via a single-quoted heredoc) ----
74+ # Emits a token stream on stdout: __KEY__name / __ITEM__value / __END__
8675 local awk_out
8776 awk_out=" $(
88- awk '
89- function trim(s){ gsub(/^[ \t]+|[ \t]+$/, "", s); return s }
90- function dequote(s){
91- if (s ~ /^".*"$/) { s=substr(s,2,length(s)-2) }
92- else if (s ~ /^' \' ' .*' \' ' $/) { s=substr(s,2,length(s)-2) }
93- return s
94- }
95- function strip_comments(line, i,c,dq,sq,esc,out,len) {
96- dq=0; sq=0; esc=0; out=""; len=length(line)
97- for (i=1; i<=len; i++) {
98- c=substr(line,i,1)
99- if (esc) { out=out c; esc=0; continue }
100- if (c=="\\") { out=out c; esc=1; continue }
101- if (c=="\"" && !sq) { dq=!dq; out=out c; continue }
102- if (c=="' \' ' " && !dq) { sq=!sq; out=out c; continue }
103- if (c=="#" && !dq && !sq) { break } # start of comment
104- out=out c
105- }
106- return out
107- }
108- BEGIN { in_list=0; base_indent=-1; key="" }
109- {
110- line = strip_comments($0)
111- if (line ~ /^[ \t]*$/) next
112- # Flow style: key: [a, b, "c"]
113- if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, m)) {
114- key = trim(m[2])
115- print "__KEY__" key
116- s = m[3]
117- n = split(s, a, /,/)
118- for (i=1;i<=n;i++){
119- val=trim(a[i]); val=dequote(val)
120- if (val!="") print "__ITEM__" val
121- }
122- print "__END__"
123- in_list=0; key=""; next
124- }
77+ awk -f - " $file " << 'AWK '
78+ function trim(s) { sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); return s }
79+ function dequote(s) {
80+ # strip matching single or double quotes
81+ if (s ~ /^".*"$/) return substr(s,2,length(s)-2)
82+ else if (s ~ /^'\''.*'\''$/) return substr(s,2,length(s)-2)
83+ return s
84+ }
85+ function strip_comments(line, i,ch,dq,sq,esc,out,len) {
86+ # remove # outside of quotes; handle \" and \' escapes
87+ dq=0; sq=0; esc=0; out=""; len=length(line)
88+ for (i=1; i<=len; i++) {
89+ ch=substr(line,i,1)
90+ if (esc) { out=out ch; esc=0; continue }
91+ if (ch=="\\") { out=out ch; esc=1; continue }
92+ if (ch=="\"" && !sq) { dq=!dq; out=out ch; continue }
93+ if (ch=="'" && !dq) { sq=!sq; out=out ch; continue }
94+ if (ch=="#" && !dq && !sq) break
95+ out=out ch
96+ }
97+ return out
98+ }
99+ BEGIN { in_list=0; base_indent=-1; key="" }
100+ {
101+ line = strip_comments($0)
102+ if (line ~ /^[ \t]*$/) next
125103
126- # Block style start: key:
127- if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, m)) {
128- # if we were in a previous list, close it
129- if (in_list==1) { print "__END__"; in_list=0; key="" }
130- key = trim(m[2])
131- base_indent = length(m[1])
132- in_list = 1
133- print "__KEY__" key
134- next
135- }
104+ # Flow: key: [a, b, "c"]
105+ if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, m)) {
106+ key = trim(m[2])
107+ print "__KEY__" key
108+ s = m[3]
109+ # naive split on commas; good for simple quoted/unquoted items
110+ n = split(s, a, /,/)
111+ for (i=1;i<=n;i++){
112+ val=trim(a[i]); val=dequote(val)
113+ if (val!="") print "__ITEM__" val
114+ }
115+ print "__END__"
116+ in_list=0; key=""
117+ next
118+ }
136119
137- # If inside a list, look for items or dedent/new key
138- if (in_list==1) {
139- # New key at dedent/same indent ends current list
140- if (match(line, /^([ \t]*)([^:# \t][^:]*):/, mk)) {
141- if (length(mk[1]) <= base_indent) {
142- print "__END__"
143- in_list=0; key=""
144- # Reprocess same line as a new key; emulate tail recursion:
145- # Flow style?
146- if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, mm)) {
147- key = trim(mm[2]); print "__KEY__" key
148- s = mm[3]; n = split(s, a, /,/)
149- for (i=1;i<=n;i++){ val=trim(a[i]); val=dequote(val); if (val!="") print "__ITEM__" val }
150- print "__END__"
151- next
152- }
153- # Block style start
154- if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, mm2)) {
155- key = trim(mm2[2]); base_indent = length(mm2[1]); in_list=1; print "__KEY__" key
156- next
157- }
158- }
159- }
160- # List item: - value
161- if (match(line, /^[ \t]*-[ \t]*(.+)$/, mi)) {
162- item = trim(mi[1]); item = dequote(item)
163- if (item!="") print "__ITEM__" item
164- next
165- }
166- # Otherwise ignore until dedent/new key
167- next
168- }
120+ # Block list start: key:
121+ if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, m)) {
122+ if (in_list==1) { print "__END__"; in_list=0; key="" }
123+ key = trim(m[2])
124+ base_indent = length(m[1])
125+ in_list = 1
126+ print "__KEY__" key
127+ next
128+ }
169129
170- # Lines outside list we do not handle (scalars, maps)
130+ if (in_list==1) {
131+ # New key at same-or-less indent ends current list
132+ if (match(line, /^([ \t]*)([^:# \t][^:]*):/, mk) && length(mk[1]) <= base_indent) {
133+ print "__END__"
134+ in_list=0; key=""
135+ # Reprocess this line as a new start
136+ if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, mm)) {
137+ key = trim(mm[2])
138+ print "__KEY__" key
139+ s = mm[3]; n = split(s, a2, /,/)
140+ for (i=1;i<=n;i++){ val=trim(a2[i]); val=dequote(val); if (val!="") print "__ITEM__" val }
141+ print "__END__"
171142 next
172143 }
173- END { if (in_list==1) print "__END__" }
174- ' " $file "
144+ if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, mm2)) {
145+ key = trim(mm2[2]); base_indent = length(mm2[1]); in_list=1; print "__KEY__" key
146+ next
147+ }
148+ }
149+
150+ # List item: - value
151+ if (match(line, /^[ \t]*-[ \t]*(.+)$/, mi)) {
152+ item = trim(mi[1]); item = dequote(item)
153+ if (item!="") print "__ITEM__" item
154+ next
155+ }
156+ }
157+ }
158+ END { if (in_list==1) print "__END__" }
159+ AWK
175160 ) "
176161
177- # ---- consume token stream and create arrays ----
178- # We gather items per key, then assign a real shell array with a unique sanitized name.
162+ # ---- consume tokens and create arrays ----
179163 if [ -z " $awk_out " ]; then
180164 echo " No top-level list categories found in: $file " >&2
181165 return 0
182166 fi
183167
184- # read line-by-line portably (bash & zsh)
185168 local cur_key=" " cur_safe=" " base=" " n=1
186- # temp items store (as a plain list we will quote on assignment)
187- if [ " $is_zsh " -eq 1 ]; then
188- typeset -ga __items
189- fi
169+ if [ " $is_zsh " -eq 1 ]; then typeset -ga __items; fi
190170 __items=()
191171
192172 _finalize_current () {
193173 [ -z " $cur_key " ] && return 0
194- # unique, sanitized var name (with prefix)
195174 cur_safe=" $( _sanitize " $cur_key " ) "
196175 cur_safe=" ${prefix}${cur_safe} "
197176 base=" $cur_safe "
@@ -207,20 +186,14 @@ collect_yaml_lists() {
207186 cur_key=" "
208187 }
209188
210- # iterate tokens
211189 while IFS= read -r line; do
212190 case " $line " in
213191 __KEY__* )
214- # finalize previous (if any) then start new
215192 _finalize_current
216193 cur_key=" ${line# __KEY__} "
217194 ;;
218- __ITEM__* )
219- __items+=(" ${line# __ITEM__} " )
220- ;;
221- __END__)
222- _finalize_current
223- ;;
195+ __ITEM__* ) __items+=(" ${line# __ITEM__} " ) ;;
196+ __END__) _finalize_current ;;
224197 esac
225198 done << EOF
226199$awk_out
0 commit comments