Skip to content

Commit f2b0387

Browse files
committed
Update awk
1 parent ea45d57 commit f2b0387

File tree

2 files changed

+66
-87
lines changed

2 files changed

+66
-87
lines changed

install.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ case "$kernel" in
2727
fi
2828
;;
2929
*"cachyos"*)
30-
sudo pacman -S --needed --noconfirm yq
3130
base_install "nas"
3231
;;
3332
*)

scripts/read_yaml.sh

Lines changed: 66 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,9 @@
11
# collect_yaml_lists <file.yaml> [--print] [--prefix PREFIX]
2-
# Pure Bash + awk YAML reader (no yq).
3-
# Supported YAML shape at the top level:
2+
# Pure Bash + ultra-portable awk. Supports top-level forms:
43
# key:
54
# - item
6-
# - item
5+
# - "item with spaces"
76
# other: [a, b, "c"]
8-
#
9-
# Creates:
10-
# categories[] : sanitized array names (with optional PREFIX)
11-
# orig_keys[] : original YAML keys (aligned with categories[])
12-
# one array per key, e.g. key "pip3" -> $pip3 (or $PREFIXpip3)
13-
#
14-
# Notes:
15-
# - This is a pragmatic parser for common dotfiles/package lists.
16-
# - It ignores nested structures under items and normalizes simple quotes.
17-
187
collect_yaml_lists() {
198
# ---- args ----
209
if [ $# -lt 1 ]; then
@@ -49,7 +38,7 @@ collect_yaml_lists() {
4938
return 1
5039
}
5140

52-
# ---- shell detection (zsh arrays are 1-based & need typeset) ----
41+
# ---- shell detection (zsh arrays need typeset and are 1-based) ----
5342
local is_zsh=0
5443
[ -n "${ZSH_VERSION:-}" ] && is_zsh=1
5544
if [ "$is_zsh" -eq 1 ]; then
@@ -71,7 +60,7 @@ collect_yaml_lists() {
7160
for n in "${categories[@]}"; do [ "$n" = "$1" ] && return 0; done
7261
return 1
7362
}
74-
_assign_array() { # _assign_array <varname> <elements...> (portable via printf %q + eval)
63+
_assign_array() { # _assign_array <varname> <elements...>
7564
local v="$1"
7665
shift
7766
local assign="$v=(" x
@@ -80,118 +69,115 @@ collect_yaml_lists() {
8069
eval "$assign"
8170
}
8271

83-
# ---- parse YAML with awk -> token stream (__KEY__/__ITEM__/__END__) ----
84-
# Pragmatic parser: handles top-level "key:" + block-style list and "key: [a, b, 'c']"
85-
# Strips comments that are not inside quotes.
72+
# ---- awk: emit tokens __KEY__/__ITEM__/__END__ (no functions; minimal regex) ----
73+
# - Removes comments only if preceded by space or tab: <space>#...
74+
# - Trims using two sub() calls (no alternation)
75+
# - Flow list split is naive (commas inside quotes not supported)
8676
local awk_out
8777
awk_out="$(
8878
awk '
89-
function trim(s){ gsub(/^[ \t]+|[ \t]+$/, "", s); return s }
90-
function dequote(s){
91-
if (s ~ /^".*"$/) { s=substr(s,2,length(s)-2) }
92-
else if (s ~ /^'\''.*'\''$/) { s=substr(s,2,length(s)-2) }
93-
return s
94-
}
95-
function strip_comments(line, i,c,dq,sq,esc,out,len) {
96-
dq=0; sq=0; esc=0; out=""; len=length(line)
97-
for (i=1; i<=len; i++) {
98-
c=substr(line,i,1)
99-
if (esc) { out=out c; esc=0; continue }
100-
if (c=="\\") { out=out c; esc=1; continue }
101-
if (c=="\"" && !sq) { dq=!dq; out=out c; continue }
102-
if (c=="'\''" && !dq) { sq=!sq; out=out c; continue }
103-
if (c=="#" && !dq && !sq) { break } # start of comment
104-
out=out c
105-
}
106-
return out
107-
}
108-
BEGIN { in_list=0; base_indent=-1; key="" }
79+
BEGIN { in_list=0; base_indent=-1 }
10980
{
110-
line = strip_comments($0)
111-
if (line ~ /^[ \t]*$/) next
112-
# Flow style: key: [a, b, "c"]
81+
line=$0
82+
sub(/[ \t]#[^\n]*$/, "", line) # strip trailing comments after space/tab + #
83+
# blank?
84+
tmp=line; sub(/^[ \t]+/, "", tmp); sub(/[ \t]+$/, "", tmp); if (tmp == "") next
85+
86+
# Flow: key: [ ... ]
11387
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, m)) {
114-
key = trim(m[2])
88+
key=m[2]; sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
11589
print "__KEY__" key
116-
s = m[3]
117-
n = split(s, a, /,/)
118-
for (i=1;i<=n;i++){
119-
val=trim(a[i]); val=dequote(val)
90+
s=m[3]
91+
n=split(s, a, /,/)
92+
for (i=1; i<=n; i++) {
93+
val=a[i]; sub(/^[ \t]+/, "", val); sub(/[ \t]+$/, "", val)
94+
# dequote simple "..." or '...'
95+
len=length(val)
96+
if (len>=2) {
97+
first=substr(val,1,1); last=substr(val,len,1)
98+
if ((first=="\"" && last=="\"") || (first=="'" && last=="'")) {
99+
val=substr(val,2,len-2)
100+
}
101+
}
120102
if (val!="") print "__ITEM__" val
121103
}
122104
print "__END__"
123-
in_list=0; key=""; next
105+
in_list=0; next
124106
}
125107
126-
# Block style start: key:
108+
# Block list start: key:
127109
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, m)) {
128-
# if we were in a previous list, close it
129-
if (in_list==1) { print "__END__"; in_list=0; key="" }
130-
key = trim(m[2])
131-
base_indent = length(m[1])
132-
in_list = 1
110+
if (in_list==1) { print "__END__"; in_list=0 }
111+
key=m[2]; sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
112+
base_indent=length(m[1]); in_list=1
133113
print "__KEY__" key
134114
next
135115
}
136116
137-
# If inside a list, look for items or dedent/new key
117+
# Inside block list?
138118
if (in_list==1) {
139-
# New key at dedent/same indent ends current list
119+
# New key at same-or-less indent ends current list
140120
if (match(line, /^([ \t]*)([^:# \t][^:]*):/, mk)) {
141121
if (length(mk[1]) <= base_indent) {
142-
print "__END__"
143-
in_list=0; key=""
144-
# Reprocess same line as a new key; emulate tail recursion:
145-
# Flow style?
122+
print "__END__"; in_list=0
123+
# Re-handle this line as new start (flow or block)
146124
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, mm)) {
147-
key = trim(mm[2]); print "__KEY__" key
148-
s = mm[3]; n = split(s, a, /,/)
149-
for (i=1;i<=n;i++){ val=trim(a[i]); val=dequote(val); if (val!="") print "__ITEM__" val }
125+
key=mm[2]; sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
126+
print "__KEY__" key
127+
s=mm[3]; n=split(s, a2, /,/)
128+
for (i=1; i<=n; i++) {
129+
val=a2[i]; sub(/^[ \t]+/, "", val); sub(/[ \t]+$/, "", val)
130+
len=length(val)
131+
if (len>=2) {
132+
first=substr(val,1,1); last=substr(val,len,1)
133+
if ((first=="\"" && last=="\"") || (first=="'" && last=="'")) {
134+
val=substr(val,2,len-2)
135+
}
136+
}
137+
if (val!="") print "__ITEM__" val
138+
}
150139
print "__END__"
151140
next
152141
}
153-
# Block style start
154142
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, mm2)) {
155-
key = trim(mm2[2]); base_indent = length(mm2[1]); in_list=1; print "__KEY__" key
143+
key=mm2[2]; sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
144+
base_indent=length(mm2[1]); in_list=1; print "__KEY__" key
156145
next
157146
}
158147
}
159148
}
149+
160150
# List item: - value
161151
if (match(line, /^[ \t]*-[ \t]*(.+)$/, mi)) {
162-
item = trim(mi[1]); item = dequote(item)
152+
item=mi[1]; sub(/^[ \t]+/, "", item); sub(/[ \t]+$/, "", item)
153+
len=length(item)
154+
if (len>=2) {
155+
first=substr(item,1,1); last=substr(item,len,1)
156+
if ((first=="\"" && last=="\"") || (first=="'" && last=="'")) {
157+
item=substr(item,2,len-2)
158+
}
159+
}
163160
if (item!="") print "__ITEM__" item
164161
next
165162
}
166-
# Otherwise ignore until dedent/new key
167-
next
168163
}
169-
170-
# Lines outside list we do not handle (scalars, maps)
171-
next
172164
}
173165
END { if (in_list==1) print "__END__" }
174166
' "$file"
175167
)"
176168

177-
# ---- consume token stream and create arrays ----
178-
# We gather items per key, then assign a real shell array with a unique sanitized name.
169+
# ---- consume tokens and create arrays ----
179170
if [ -z "$awk_out" ]; then
180171
echo "No top-level list categories found in: $file" >&2
181172
return 0
182173
fi
183174

184-
# read line-by-line portably (bash & zsh)
185175
local cur_key="" cur_safe="" base="" n=1
186-
# temp items store (as a plain list we will quote on assignment)
187-
if [ "$is_zsh" -eq 1 ]; then
188-
typeset -ga __items
189-
fi
176+
if [ "$is_zsh" -eq 1 ]; then typeset -ga __items; fi
190177
__items=()
191178

192179
_finalize_current() {
193180
[ -z "$cur_key" ] && return 0
194-
# unique, sanitized var name (with prefix)
195181
cur_safe="$(_sanitize "$cur_key")"
196182
cur_safe="${prefix}${cur_safe}"
197183
base="$cur_safe"
@@ -207,20 +193,14 @@ collect_yaml_lists() {
207193
cur_key=""
208194
}
209195

210-
# iterate tokens
211196
while IFS= read -r line; do
212197
case "$line" in
213198
__KEY__*)
214-
# finalize previous (if any) then start new
215199
_finalize_current
216200
cur_key="${line#__KEY__}"
217201
;;
218-
__ITEM__*)
219-
__items+=("${line#__ITEM__}")
220-
;;
221-
__END__)
222-
_finalize_current
223-
;;
202+
__ITEM__*) __items+=("${line#__ITEM__}") ;;
203+
__END__) _finalize_current ;;
224204
esac
225205
done <<EOF
226206
$awk_out

0 commit comments

Comments
 (0)