|
1 | 1 | #!/bin/bash |
2 | | -# Arc ClickBench Complete Benchmark Script (Go Binary Version) |
3 | | -set -e |
4 | | - |
5 | | -# ============================================================ |
6 | | -# 1. INSTALL ARC FROM .DEB PACKAGE |
7 | | -# ============================================================ |
8 | | -echo "Installing Arc from .deb package..." |
9 | | - |
10 | | -# Fetch latest Arc version from GitHub releases |
11 | | -echo "Fetching latest Arc version..." |
12 | | -ARC_VERSION=$(curl -s https://api.github.com/repos/Basekick-Labs/arc/releases/latest | grep -oP '"tag_name": "v\K[^"]+') |
13 | | -if [ -z "$ARC_VERSION" ]; then |
14 | | - echo "Error: Could not fetch latest Arc version from GitHub" |
15 | | - exit 1 |
16 | | -fi |
17 | | -echo "Latest Arc version: $ARC_VERSION" |
18 | | - |
19 | | -ARCH=$(uname -m) |
20 | | -if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then |
21 | | - DEB_URL="https://github.com/Basekick-Labs/arc/releases/download/v${ARC_VERSION}/arc_${ARC_VERSION}_arm64.deb" |
22 | | - DEB_FILE="arc_${ARC_VERSION}_arm64.deb" |
23 | | -else |
24 | | - DEB_URL="https://github.com/Basekick-Labs/arc/releases/download/v${ARC_VERSION}/arc_${ARC_VERSION}_amd64.deb" |
25 | | - DEB_FILE="arc_${ARC_VERSION}_amd64.deb" |
26 | | -fi |
27 | | - |
28 | | -echo "Detected architecture: $ARCH -> $DEB_FILE" |
29 | | - |
30 | | -if [ ! -f "$DEB_FILE" ]; then |
31 | | - wget -q "$DEB_URL" -O "$DEB_FILE" |
32 | | -fi |
33 | | - |
34 | | -sudo dpkg -i "$DEB_FILE" || sudo apt-get install -f -y |
35 | | -echo "[OK] Arc installed" |
36 | | - |
37 | | -# ============================================================ |
38 | | -# 2. PRINT SYSTEM INFO (Arc defaults) |
39 | | -# ============================================================ |
40 | | -CORES=$(nproc) |
41 | | -TOTAL_MEM_KB=$(grep MemTotal /proc/meminfo | awk '{print $2}') |
42 | | -TOTAL_MEM_GB=$((TOTAL_MEM_KB / 1024 / 1024)) |
43 | | -MEM_LIMIT_GB=$((TOTAL_MEM_GB * 80 / 100)) # 80% of system RAM |
44 | | - |
45 | | -echo "" |
46 | | -echo "System Configuration:" |
47 | | -echo " CPU cores: $CORES" |
48 | | -echo " Connections: $((CORES * 2)) (cores × 2)" |
49 | | -echo " Threads: $CORES (same as cores)" |
50 | | -echo " Memory limit: ${MEM_LIMIT_GB}GB (80% of ${TOTAL_MEM_GB}GB total)" |
51 | | -echo "" |
52 | | - |
53 | | -# ============================================================ |
54 | | -# 3. START ARC AND CAPTURE TOKEN FROM LOGS |
55 | | -# ============================================================ |
56 | | -echo "Starting Arc service..." |
57 | | - |
58 | | -# Check if we already have a valid token from a previous run |
59 | | -if [ -f "arc_token.txt" ]; then |
60 | | - EXISTING_TOKEN=$(cat arc_token.txt) |
61 | | - echo "Found existing token file, will verify after Arc starts..." |
62 | | -fi |
63 | | - |
64 | | -sudo systemctl start arc |
65 | | - |
66 | | -# Wait for Arc to be ready |
67 | | -echo "Waiting for Arc to be ready..." |
68 | | -for i in {1..30}; do |
69 | | - if curl -sf http://localhost:8000/health > /dev/null 2>&1; then |
70 | | - echo "[OK] Arc is ready!" |
71 | | - break |
72 | | - fi |
73 | | - if [ $i -eq 30 ]; then |
74 | | - echo "Error: Arc failed to start within 30 seconds" |
75 | | - sudo journalctl -u arc --no-pager | tail -50 |
76 | | - exit 1 |
77 | | - fi |
78 | | - sleep 1 |
79 | | -done |
80 | | - |
81 | | -# Try to get token - either from existing file or from logs (first run) |
82 | | -ARC_TOKEN="" |
83 | | - |
84 | | -# First, check if existing token works |
85 | | -if [ -n "$EXISTING_TOKEN" ]; then |
86 | | - if curl -sf http://localhost:8000/health -H "x-api-key: $EXISTING_TOKEN" > /dev/null 2>&1; then |
87 | | - ARC_TOKEN="$EXISTING_TOKEN" |
88 | | - echo "[OK] Using existing token from arc_token.txt" |
89 | | - else |
90 | | - echo "Existing token invalid, looking for new token in logs..." |
91 | | - fi |
92 | | -fi |
93 | | - |
94 | | -# If no valid token yet, try to extract from logs (first run scenario) |
95 | | -if [ -z "$ARC_TOKEN" ]; then |
96 | | - ARC_TOKEN=$(sudo journalctl -u arc --no-pager | grep -oP '(?:Initial admin API token|Admin API token): \K[^\s]+' | head -1) |
97 | | - if [ -n "$ARC_TOKEN" ]; then |
98 | | - echo "[OK] Captured new token from logs" |
99 | | - echo "$ARC_TOKEN" > arc_token.txt |
100 | | - else |
101 | | - echo "Error: Could not find or validate API token" |
102 | | - echo "If this is not the first run, Arc's database may need to be reset:" |
103 | | - echo " sudo rm -rf /var/lib/arc/data/arc.db" |
104 | | - exit 1 |
105 | | - fi |
106 | | -fi |
107 | | - |
108 | | -echo "Token: ${ARC_TOKEN:0:20}..." |
109 | | - |
110 | | -# ============================================================ |
111 | | -# 4. DOWNLOAD DATASET |
112 | | -# ============================================================ |
113 | | -DATASET_FILE="hits.parquet" |
114 | | -DATASET_URL="https://datasets.clickhouse.com/hits_compatible/hits.parquet" |
115 | | -EXPECTED_SIZE=14779976446 |
116 | | - |
117 | | -if [ -f "$DATASET_FILE" ]; then |
118 | | - CURRENT_SIZE=$(stat -c%s "$DATASET_FILE" 2>/dev/null || stat -f%z "$DATASET_FILE" 2>/dev/null) |
119 | | - if [ "$CURRENT_SIZE" -eq "$EXPECTED_SIZE" ]; then |
120 | | - echo "[OK] Dataset already downloaded (14GB)" |
121 | | - else |
122 | | - echo "Re-downloading dataset (size mismatch)..." |
123 | | - rm -f "$DATASET_FILE" |
124 | | - wget --continue --progress=dot:giga "$DATASET_URL" |
125 | | - fi |
126 | | -else |
127 | | - echo "Downloading ClickBench dataset (14GB)..." |
128 | | - wget --continue --progress=dot:giga "$DATASET_URL" |
129 | | -fi |
130 | | - |
131 | | -# ============================================================ |
132 | | -# 5. LOAD DATA INTO ARC |
133 | | -# ============================================================ |
134 | | -echo "Loading data into Arc..." |
135 | | - |
136 | | -# Determine Arc's data directory (default: /var/lib/arc/data) |
137 | | -ARC_DATA_DIR="/var/lib/arc/data" |
138 | | -TARGET_DIR="$ARC_DATA_DIR/clickbench/hits" |
139 | | -TARGET_FILE="$TARGET_DIR/hits.parquet" |
140 | | - |
141 | | -sudo mkdir -p "$TARGET_DIR" |
142 | | - |
143 | | -if [ -f "$TARGET_FILE" ]; then |
144 | | - SOURCE_SIZE=$(stat -c%s "$DATASET_FILE" 2>/dev/null || stat -f%z "$DATASET_FILE" 2>/dev/null) |
145 | | - TARGET_SIZE=$(stat -c%s "$TARGET_FILE" 2>/dev/null || stat -f%z "$TARGET_FILE" 2>/dev/null) |
146 | | - if [ "$SOURCE_SIZE" -eq "$TARGET_SIZE" ]; then |
147 | | - echo "[OK] Data already loaded" |
148 | | - else |
149 | | - echo "Reloading data (size mismatch)..." |
150 | | - sudo cp "$DATASET_FILE" "$TARGET_FILE" |
151 | | - fi |
152 | | -else |
153 | | - sudo cp "$DATASET_FILE" "$TARGET_FILE" |
154 | | - echo "[OK] Data loaded to $TARGET_FILE" |
155 | | -fi |
156 | | - |
157 | | -# ============================================================ |
158 | | -# 6. SET ENVIRONMENT AND RUN BENCHMARK |
159 | | -# ============================================================ |
160 | | -export ARC_URL="http://localhost:8000" |
161 | | -export ARC_API_KEY="$ARC_TOKEN" |
162 | | -export DATABASE="clickbench" |
163 | | -export TABLE="hits" |
164 | | - |
165 | | -echo "" |
166 | | -echo "Running ClickBench queries (true cold runs)..." |
167 | | -echo "================================================" |
168 | | -./run.sh 2>&1 | tee log.txt |
169 | | - |
170 | | -# ============================================================ |
171 | | -# 7. STOP ARC AND FORMAT RESULTS |
172 | | -# ============================================================ |
173 | | -echo "Stopping Arc..." |
174 | | -sudo systemctl stop arc |
175 | | - |
176 | | -# Format results as proper JSON array |
177 | | -cat log.txt | grep -oE '^[0-9]+\.[0-9]+|^null' | \ |
178 | | - awk '{ |
179 | | - if (NR % 3 == 1) printf "["; |
180 | | - printf "%s", $1; |
181 | | - if (NR % 3 == 0) print "],"; |
182 | | - else printf ", "; |
183 | | - }' > results.txt |
184 | | - |
185 | | -echo "" |
186 | | -echo "[OK] Benchmark complete!" |
187 | | -echo "================================================" |
188 | | -echo "Load time: 0" |
189 | | -echo "Data size: $EXPECTED_SIZE" |
190 | | -cat results.txt |
191 | | -echo "================================================" |
192 | | - |
193 | | -# ============================================================ |
194 | | -# 8. CLEANUP |
195 | | -# ============================================================ |
196 | | -echo "Cleaning up..." |
197 | | - |
198 | | -# Uninstall Arc package |
199 | | -sudo dpkg -r arc || true |
200 | | - |
201 | | -# Remove Arc data directory |
202 | | -sudo rm -rf /var/lib/arc |
203 | | - |
204 | | -echo "[OK] Cleanup complete" |
| 2 | +# Thin shim — actual flow is in lib/benchmark-common.sh. |
| 3 | +export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single" |
| 4 | +export BENCH_DURABLE=yes |
| 5 | +exec ../lib/benchmark-common.sh |
0 commit comments