Skip to content

Commit dd3ec95

Browse files
committed
Update awk
1 parent ea45d57 commit dd3ec95

File tree

3 files changed

+77
-106
lines changed

3 files changed

+77
-106
lines changed

install.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ case "$kernel" in
2727
fi
2828
;;
2929
*"cachyos"*)
30-
sudo pacman -S --needed --noconfirm yq
3130
base_install "nas"
3231
;;
3332
*)

scripts/base_install.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@ function base_install() {
1313
local file="$DOTFILES_DIR/scripts/configs/$system.yml"
1414

1515
INFO "Collecting packages for $system ($file)"
16-
collect_yaml_lists "$file" --print
16+
collect_yaml_lists "$file"
1717

1818
local i name orig
1919
# run the most basic install first then run some required commands
2020

2121
INFO "Running base program install"
22+
_do apt_update
2223
for i in "${!categories[@]}"; do
2324
name="${categories[$i]}"
2425
orig="${orig_keys[$i]}"

scripts/read_yaml.sh

Lines changed: 75 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,9 @@
11
# collect_yaml_lists <file.yaml> [--print] [--prefix PREFIX]
2-
# Pure Bash + awk YAML reader (no yq).
3-
# Supported YAML shape at the top level:
2+
# No yq. Works on mawk/BusyBox/BSD/gawk. Supports top-level:
43
# key:
54
# - item
6-
# - item
5+
# - "item with spaces"
76
# other: [a, b, "c"]
8-
#
9-
# Creates:
10-
# categories[] : sanitized array names (with optional PREFIX)
11-
# orig_keys[] : original YAML keys (aligned with categories[])
12-
# one array per key, e.g. key "pip3" -> $pip3 (or $PREFIXpip3)
13-
#
14-
# Notes:
15-
# - This is a pragmatic parser for common dotfiles/package lists.
16-
# - It ignores nested structures under items and normalizes simple quotes.
17-
187
collect_yaml_lists() {
198
# ---- args ----
209
if [ $# -lt 1 ]; then
@@ -49,7 +38,7 @@ collect_yaml_lists() {
4938
return 1
5039
}
5140

52-
# ---- shell detection (zsh arrays are 1-based & need typeset) ----
41+
# ---- shell detection ----
5342
local is_zsh=0
5443
[ -n "${ZSH_VERSION:-}" ] && is_zsh=1
5544
if [ "$is_zsh" -eq 1 ]; then
@@ -71,7 +60,7 @@ collect_yaml_lists() {
7160
for n in "${categories[@]}"; do [ "$n" = "$1" ] && return 0; done
7261
return 1
7362
}
74-
_assign_array() { # _assign_array <varname> <elements...> (portable via printf %q + eval)
63+
_assign_array() {
7564
local v="$1"
7665
shift
7766
local assign="$v=(" x
@@ -80,118 +69,106 @@ collect_yaml_lists() {
8069
eval "$assign"
8170
}
8271

83-
# ---- parse YAML with awk -> token stream (__KEY__/__ITEM__/__END__) ----
84-
# Pragmatic parser: handles top-level "key:" + block-style list and "key: [a, b, 'c']"
85-
# Strips comments that are not inside quotes.
72+
# ---- awk tokenizer (mawk-safe: only 2-arg match, no functions) ----
73+
# Emits lines: __KEY__name / __ITEM__value / __END__
8674
local awk_out
8775
awk_out="$(
8876
awk '
89-
function trim(s){ gsub(/^[ \t]+|[ \t]+$/, "", s); return s }
90-
function dequote(s){
91-
if (s ~ /^".*"$/) { s=substr(s,2,length(s)-2) }
92-
else if (s ~ /^'\''.*'\''$/) { s=substr(s,2,length(s)-2) }
93-
return s
94-
}
95-
function strip_comments(line, i,c,dq,sq,esc,out,len) {
96-
dq=0; sq=0; esc=0; out=""; len=length(line)
97-
for (i=1; i<=len; i++) {
98-
c=substr(line,i,1)
99-
if (esc) { out=out c; esc=0; continue }
100-
if (c=="\\") { out=out c; esc=1; continue }
101-
if (c=="\"" && !sq) { dq=!dq; out=out c; continue }
102-
if (c=="'\''" && !dq) { sq=!sq; out=out c; continue }
103-
if (c=="#" && !dq && !sq) { break } # start of comment
104-
out=out c
105-
}
106-
return out
107-
}
108-
BEGIN { in_list=0; base_indent=-1; key="" }
77+
BEGIN { in_list=0; base_indent=0 }
10978
{
110-
line = strip_comments($0)
111-
if (line ~ /^[ \t]*$/) next
112-
# Flow style: key: [a, b, "c"]
113-
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, m)) {
114-
key = trim(m[2])
115-
print "__KEY__" key
116-
s = m[3]
117-
n = split(s, a, /,/)
118-
for (i=1;i<=n;i++){
119-
val=trim(a[i]); val=dequote(val)
120-
if (val!="") print "__ITEM__" val
79+
line=$0
80+
81+
# remove trailing comments only if a space/tab precedes #
82+
sub(/[ \t]#[^\n]*$/, "", line)
83+
84+
# trim both ends (no functions, do it inline)
85+
t=line; sub(/^[ \t]+/, "", t); sub(/[ \t]+$/, "", t)
86+
if (t=="") next
87+
88+
# compute indent = number of leading spaces/tabs
89+
tmp=line; match(tmp, /^[ \t]*/); indent=RLENGTH
90+
91+
# detect flow: key: [ ... ]
92+
if (line ~ /^[ \t]*[^:# \t][^:]*:[ \t]*\[/) {
93+
# key = text before first colon
94+
l2=line; sub(/^[ \t]+/, "", l2)
95+
cpos=index(l2, ":")
96+
if (cpos>0) {
97+
key=substr(l2,1,cpos-1); sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
98+
print "__KEY__" key
99+
# extract inside brackets from first "[" to last "]"
100+
bpos=index(l2, "[")
101+
s=substr(l2, bpos+1)
102+
sub(/\][ \t]*$/, "", s)
103+
# split on commas (commas inside quotes not supported)
104+
n=split(s, A, ",")
105+
for (i=1; i<=n; i++) {
106+
val=A[i]; sub(/^[ \t]+/, "", val); sub(/[ \t]+$/, "", val)
107+
# dequote simple "..." or '...'
108+
L=length(val)
109+
if (L>=2) {
110+
f=substr(val,1,1); e=substr(val,L,1)
111+
if ((f=="\"" && e=="\"") || (f=="'" && e=="'")) val=substr(val,2,L-2)
112+
}
113+
if (val!="") print "__ITEM__" val
114+
}
115+
print "__END__"
116+
in_list=0
117+
next
121118
}
122-
print "__END__"
123-
in_list=0; key=""; next
124119
}
125120
126-
# Block style start: key:
127-
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, m)) {
128-
# if we were in a previous list, close it
129-
if (in_list==1) { print "__END__"; in_list=0; key="" }
130-
key = trim(m[2])
131-
base_indent = length(m[1])
132-
in_list = 1
121+
# detect block start: key:
122+
if (line ~ /^[ \t]*[^:# \t][^:]*:[ \t]*$/) {
123+
if (in_list==1) { print "__END__"; in_list=0 }
124+
l2=line; sub(/^[ \t]+/, "", l2)
125+
cpos=index(l2, ":")
126+
key=substr(l2,1,cpos-1); sub(/^[ \t]+/, "", key); sub(/[ \t]+$/, "", key)
127+
base_indent=indent
128+
in_list=1
133129
print "__KEY__" key
134130
next
135131
}
136132
137-
# If inside a list, look for items or dedent/new key
138133
if (in_list==1) {
139-
# New key at dedent/same indent ends current list
140-
if (match(line, /^([ \t]*)([^:# \t][^:]*):/, mk)) {
141-
if (length(mk[1]) <= base_indent) {
142-
print "__END__"
143-
in_list=0; key=""
144-
# Reprocess same line as a new key; emulate tail recursion:
145-
# Flow style?
146-
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*\[(.*)\][ \t]*$/, mm)) {
147-
key = trim(mm[2]); print "__KEY__" key
148-
s = mm[3]; n = split(s, a, /,/)
149-
for (i=1;i<=n;i++){ val=trim(a[i]); val=dequote(val); if (val!="") print "__ITEM__" val }
150-
print "__END__"
151-
next
152-
}
153-
# Block style start
154-
if (match(line, /^([ \t]*)([^:# \t][^:]*):[ \t]*$/, mm2)) {
155-
key = trim(mm2[2]); base_indent = length(mm2[1]); in_list=1; print "__KEY__" key
156-
next
134+
# if a new key at same-or-less indent, end the list and re-handle line
135+
if (line ~ /^[ \t]*[^:# \t][^:]*:/ && indent <= base_indent) {
136+
print "__END__"
137+
in_list=0
138+
# fall through to let the next iterations process this line again
139+
} else {
140+
# list item: - value
141+
if (line ~ /^[ \t]*-[ \t]*/) {
142+
item=line
143+
sub(/^[ \t]*-[ \t]*/, "", item)
144+
sub(/[ \t]+$/, "", item)
145+
L=length(item)
146+
if (L>=2) {
147+
f=substr(item,1,1); e=substr(item,L,1)
148+
if ((f=="\"" && e=="\"") || (f=="'" && e=="'")) item=substr(item,2,L-2)
157149
}
150+
if (item!="") print "__ITEM__" item
151+
next
158152
}
159153
}
160-
# List item: - value
161-
if (match(line, /^[ \t]*-[ \t]*(.+)$/, mi)) {
162-
item = trim(mi[1]); item = dequote(item)
163-
if (item!="") print "__ITEM__" item
164-
next
165-
}
166-
# Otherwise ignore until dedent/new key
167-
next
168154
}
169-
170-
# Lines outside list we do not handle (scalars, maps)
171-
next
172155
}
173156
END { if (in_list==1) print "__END__" }
174157
' "$file"
175158
)"
176159

177-
# ---- consume token stream and create arrays ----
178-
# We gather items per key, then assign a real shell array with a unique sanitized name.
160+
# ---- consume tokens and create arrays ----
179161
if [ -z "$awk_out" ]; then
180162
echo "No top-level list categories found in: $file" >&2
181163
return 0
182164
fi
183165

184-
# read line-by-line portably (bash & zsh)
185166
local cur_key="" cur_safe="" base="" n=1
186-
# temp items store (as a plain list we will quote on assignment)
187-
if [ "$is_zsh" -eq 1 ]; then
188-
typeset -ga __items
189-
fi
167+
if [ "$is_zsh" -eq 1 ]; then typeset -ga __items; fi
190168
__items=()
191169

192170
_finalize_current() {
193171
[ -z "$cur_key" ] && return 0
194-
# unique, sanitized var name (with prefix)
195172
cur_safe="$(_sanitize "$cur_key")"
196173
cur_safe="${prefix}${cur_safe}"
197174
base="$cur_safe"
@@ -207,20 +184,14 @@ collect_yaml_lists() {
207184
cur_key=""
208185
}
209186

210-
# iterate tokens
211187
while IFS= read -r line; do
212188
case "$line" in
213189
__KEY__*)
214-
# finalize previous (if any) then start new
215190
_finalize_current
216191
cur_key="${line#__KEY__}"
217192
;;
218-
__ITEM__*)
219-
__items+=("${line#__ITEM__}")
220-
;;
221-
__END__)
222-
_finalize_current
223-
;;
193+
__ITEM__*) __items+=("${line#__ITEM__}") ;;
194+
__END__) _finalize_current ;;
224195
esac
225196
done <<EOF
226197
$awk_out

0 commit comments

Comments
 (0)