Skip to content

Commit 560809d

Browse files
committed
Update awk
1 parent ea45d57 commit 560809d

File tree

2 files changed

+81
-115
lines changed

2 files changed

+81
-115
lines changed

install.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ case "$kernel" in
2727
fi
2828
;;
2929
*"cachyos"*)
30-
sudo pacman -S --needed --noconfirm yq
3130
base_install "nas"
3231
;;
3332
*)

scripts/read_yaml.sh

Lines changed: 81 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,10 @@
11
# collect_yaml_lists <file.yaml> [--print] [--prefix PREFIX]
2-
# Pure Bash + awk YAML reader (no yq).
3-
# Supported YAML shape at the top level:
2+
# Pure Bash + awk (BSD/GNU/mawk/BusyBox) YAML reader for simple top-level lists.
3+
# Supports:
44
# key:
55
# - item
6-
# - item
6+
# - "item with spaces"
77
# other: [a, b, "c"]
8-
#
9-
# Creates:
10-
# categories[] : sanitized array names (with optional PREFIX)
11-
# orig_keys[] : original YAML keys (aligned with categories[])
12-
# one array per key, e.g. key "pip3" -> $pip3 (or $PREFIXpip3)
13-
#
14-
# Notes:
15-
# - This is a pragmatic parser for common dotfiles/package lists.
16-
# - It ignores nested structures under items and normalizes simple quotes.
17-
188
collect_yaml_lists() {
199
# ---- args ----
2010
if [ $# -lt 1 ]; then
@@ -80,118 +70,101 @@ collect_yaml_lists() {
8070
eval "$assign"
8171
}
8272

83-
# ---- parse YAML with awk -> token stream (__KEY__/__ITEM__/__END__) ----
84-
# Pragmatic parser: handles top-level "key:" + block-style list and "key: [a, b, 'c']"
85-
# Strips comments that are not inside quotes.
73+
# ---- parse YAML with awk (script via single-quoted heredoc) ----
74+
# Emits token lines: __KEY__name / __ITEM__value / __END__
8675
local awk_out
8776
awk_out="$(
88-
awk '
89-
function trim(s){ gsub(/^[ \t]+|[ \t]+$/, "", s); return s }
90-
function dequote(s){
91-
if (s ~ /^".*"$/) { s=substr(s,2,length(s)-2) }
92-
else if (s ~ /^'\''.*'\''$/) { s=substr(s,2,length(s)-2) }
93-
return s
94-
}
95-
function strip_comments(line, i,c,dq,sq,esc,out,len) {
96-
dq=0; sq=0; esc=0; out=""; len=length(line)
97-
for (i=1; i<=len; i++) {
98-
c=substr(line,i,1)
99-
if (esc) { out=out c; esc=0; continue }
100-
if (c=="\\") { out=out c; esc=1; continue }
101-
if (c=="\"" && !sq) { dq=!dq; out=out c; continue }
102-
if (c=="'\''" && !dq) { sq=!sq; out=out c; continue }
103-
if (c=="#" && !dq && !sq) { break } # start of comment
104-
out=out c
105-
}
106-
return out
107-
}
108-
BEGIN { in_list=0; base_indent=-1; key="" }
109-
{
110-
line = strip_comments($0)
111-
if (line ~ /^[ \t]*$/) next
112-
# Flow style: key: [a, b, "c"]
113-
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, m)) {
114-
key = trim(m[2])
115-
print "__KEY__" key
116-
s = m[3]
117-
n = split(s, a, /,/)
118-
for (i=1;i<=n;i++){
119-
val=trim(a[i]); val=dequote(val)
120-
if (val!="") print "__ITEM__" val
121-
}
122-
print "__END__"
123-
in_list=0; key=""; next
124-
}
77+
awk -f - "$file" <<'AWK'
78+
function trim(s) { sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); return s }
79+
function dequote(s, first,last,len) {
80+
len = length(s); if (len < 2) return s
81+
first = substr(s,1,1); last = substr(s,len,1)
82+
if ((first == "\"" && last == "\"") || (first == "'" && last == "'")) {
83+
return substr(s,2,len-2)
84+
}
85+
return s
86+
}
87+
function strip_comments(line, i,ch,dq,sq,esc,out,len) {
88+
# remove # outside of quotes; handle \" and \' escapes
89+
dq=0; sq=0; esc=0; out=""; len=length(line)
90+
for (i=1; i<=len; i++) {
91+
ch=substr(line,i,1)
92+
if (esc) { out=out ch; esc=0; continue }
93+
if (ch=="\\") { out=out ch; esc=1; continue }
94+
if (ch=="\"" && !sq) { dq=!dq; out=out ch; continue }
95+
if (ch=="'" && !dq) { sq=!sq; out=out ch; continue }
96+
if (ch=="#" && !dq && !sq) break
97+
out=out ch
98+
}
99+
return out
100+
}
101+
BEGIN { in_list=0; base_indent=-1; key="" }
102+
{
103+
line = strip_comments($0)
104+
if (line ~ /^[ \t]*$/) next
125105
126-
# Block style start: key:
127-
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, m)) {
128-
# if we were in a previous list, close it
129-
if (in_list==1) { print "__END__"; in_list=0; key="" }
130-
key = trim(m[2])
131-
base_indent = length(m[1])
132-
in_list = 1
133-
print "__KEY__" key
134-
next
135-
}
106+
# Flow: key: [a, b, "c"]
107+
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, m)) {
108+
key = trim(m[2]); print "__KEY__" key
109+
s = m[3]
110+
n = split(s, a, /,/) # pragmatic split; commas inside quotes not supported
111+
for (i=1;i<=n;i++){
112+
val=trim(a[i]); val=dequote(val)
113+
if (val!="") print "__ITEM__" val
114+
}
115+
print "__END__"
116+
in_list=0; key=""; next
117+
}
136118
137-
# If inside a list, look for items or dedent/new key
138-
if (in_list==1) {
139-
# New key at dedent/same indent ends current list
140-
if (match(line, /^([ \t]*)([^:# \t][^:]*):/, mk)) {
141-
if (length(mk[1]) <= base_indent) {
142-
print "__END__"
143-
in_list=0; key=""
144-
# Reprocess same line as a new key; emulate tail recursion:
145-
# Flow style?
146-
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, mm)) {
147-
key = trim(mm[2]); print "__KEY__" key
148-
s = mm[3]; n = split(s, a, /,/)
149-
for (i=1;i<=n;i++){ val=trim(a[i]); val=dequote(val); if (val!="") print "__ITEM__" val }
150-
print "__END__"
151-
next
152-
}
153-
# Block style start
154-
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, mm2)) {
155-
key = trim(mm2[2]); base_indent = length(mm2[1]); in_list=1; print "__KEY__" key
156-
next
157-
}
158-
}
159-
}
160-
# List item: - value
161-
if (match(line, /^[ \t]*-[ \t]*(.+)$/, mi)) {
162-
item = trim(mi[1]); item = dequote(item)
163-
if (item!="") print "__ITEM__" item
164-
next
165-
}
166-
# Otherwise ignore until dedent/new key
167-
next
168-
}
119+
# Block list start: key:
120+
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, m)) {
121+
if (in_list==1) { print "__END__"; in_list=0; key="" }
122+
key = trim(m[2]); base_indent = length(m[1]); in_list = 1
123+
print "__KEY__" key
124+
next
125+
}
169126
170-
# Lines outside list we do not handle (scalars, maps)
127+
if (in_list==1) {
128+
# New key at same-or-less indent ends current list
129+
if (match(line, /^([ \t]*)([^:# \t][^:]*):/, mk) && length(mk[1]) <= base_indent) {
130+
print "__END__"; in_list=0; key=""
131+
# Reprocess same line as a new start
132+
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, mm)) {
133+
key = trim(mm[2]); print "__KEY__" key
134+
s = mm[3]; n = split(s, a2, /,/)
135+
for (i=1;i<=n;i++){ val=trim(a2[i]); val=dequote(val); if (val!="") print "__ITEM__" val }
136+
print "__END__"; next
137+
}
138+
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, mm2)) {
139+
key = trim(mm2[2]); base_indent = length(mm2[1]); in_list=1; print "__KEY__" key
171140
next
172141
}
173-
END { if (in_list==1) print "__END__" }
174-
' "$file"
142+
}
143+
144+
# List item: - value
145+
if (match(line, /^[ \t]*-[ \t]*(.+)$/, mi)) {
146+
item = trim(mi[1]); item = dequote(item)
147+
if (item!="") print "__ITEM__" item
148+
next
149+
}
150+
}
151+
}
152+
END { if (in_list==1) print "__END__" }
153+
AWK
175154
)"
176155

177-
# ---- consume token stream and create arrays ----
178-
# We gather items per key, then assign a real shell array with a unique sanitized name.
156+
# ---- consume tokens and create arrays ----
179157
if [ -z "$awk_out" ]; then
180158
echo "No top-level list categories found in: $file" >&2
181159
return 0
182160
fi
183161

184-
# read line-by-line portably (bash & zsh)
185162
local cur_key="" cur_safe="" base="" n=1
186-
# temp items store (as a plain list we will quote on assignment)
187-
if [ "$is_zsh" -eq 1 ]; then
188-
typeset -ga __items
189-
fi
163+
if [ "$is_zsh" -eq 1 ]; then typeset -ga __items; fi
190164
__items=()
191165

192166
_finalize_current() {
193167
[ -z "$cur_key" ] && return 0
194-
# unique, sanitized var name (with prefix)
195168
cur_safe="$(_sanitize "$cur_key")"
196169
cur_safe="${prefix}${cur_safe}"
197170
base="$cur_safe"
@@ -207,20 +180,14 @@ collect_yaml_lists() {
207180
cur_key=""
208181
}
209182

210-
# iterate tokens
211183
while IFS= read -r line; do
212184
case "$line" in
213185
__KEY__*)
214-
# finalize previous (if any) then start new
215186
_finalize_current
216187
cur_key="${line#__KEY__}"
217188
;;
218-
__ITEM__*)
219-
__items+=("${line#__ITEM__}")
220-
;;
221-
__END__)
222-
_finalize_current
223-
;;
189+
__ITEM__*) __items+=("${line#__ITEM__}") ;;
190+
__END__) _finalize_current ;;
224191
esac
225192
done <<EOF
226193
$awk_out

0 commit comments

Comments
 (0)