|
4 | 4 | using System; |
5 | 5 | using System.IO; |
6 | 6 | using System.Linq; |
| 7 | +using System.Text; |
7 | 8 | using System.Threading; |
8 | 9 | using System.Threading.Tasks; |
9 | 10 | using Garnet.server; |
| 11 | +using Garnet.server.BfTreeInterop; |
10 | 12 | using NUnit.Framework; |
11 | 13 | using NUnit.Framework.Legacy; |
12 | 14 | using StackExchange.Redis; |
@@ -1100,6 +1102,137 @@ public void RICheckpointAndRecoverTest() |
1100 | 1102 | } |
1101 | 1103 | } |
1102 | 1104 |
|
| 1105 | + /// <summary> |
| 1106 | + /// [Explicit] Repro: a DISK-backed range index with enough records to make the underlying |
| 1107 | + /// BfTree MULTI-LEVEL (1000 tiny 4-byte fields/values) is checkpointed (SAVE), recovered on |
| 1108 | + /// restart (RecoverFromCprSnapshot), then dropped when the server is disposed. Dropping the |
| 1109 | + /// recovered multi-level tree aborts the process in bf-tree's <c>bftree_drop</c> |
| 1110 | + /// (assertion failed: next_level.is_null() at mini_page_op.rs:429). The panic fires on a |
| 1111 | + /// bf-tree internal background thread (a native abort), so it cannot be caught by |
| 1112 | + /// <c>--blame-crash</c>/createdump. The crash is driven by total data volume vs the small |
| 1113 | + /// 64 KiB cache (multi-level split), not by record size — small trees recover and drop |
| 1114 | + /// cleanly (see <see cref="RICheckpointAndRecoverTest"/>). The pure-native form of this crash |
| 1115 | + /// is <see cref="BfTreeRecoverFromCprSnapshotThenDrop_Crashes"/>. Crashes the test |
| 1116 | + /// host by design. |
| 1117 | + /// </summary> |
| 1118 | + [Test] |
| 1119 | + [Explicit("Crashes by design: drops a CprSnapshot-recovered multi-level BfTree (bf-tree 0.5.0 bug).")] |
| 1120 | + public void RICheckpointRecoverThenDrop_Crashes() |
| 1121 | + { |
| 1122 | + server.Dispose(); |
| 1123 | + TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); |
| 1124 | + server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true); |
| 1125 | + server.Start(); |
| 1126 | + |
| 1127 | + const string index = "cpindex"; |
| 1128 | + const int records = 1000; // well above the multi-level threshold for these tiny records |
| 1129 | + |
| 1130 | + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true))) |
| 1131 | + { |
| 1132 | + var db = redis.GetDatabase(0); |
| 1133 | + |
| 1134 | + db.Execute("RI.CREATE", index, "DISK", "CACHESIZE", "65536", "MINRECORD", "8"); |
| 1135 | + for (var i = 0; i < records; i++) |
| 1136 | + db.Execute("RI.SET", index, i.ToString("D4"), i.ToString("D4")); // 4-byte field + value |
| 1137 | + |
| 1138 | + // Sanity: data is present before checkpoint. |
| 1139 | + ClassicAssert.AreEqual("0000", (string)db.Execute("RI.GET", index, "0000")); |
| 1140 | + |
| 1141 | + // Checkpoint: CprSnapshot of the (now multi-level) tree. |
| 1142 | + db.Execute("SAVE"); |
| 1143 | + } |
| 1144 | + |
| 1145 | + // Restart with recovery: the tree is reloaded via RecoverFromCprSnapshot. |
| 1146 | + server.Dispose(); |
| 1147 | + server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true, tryRecover: true); |
| 1148 | + server.Start(); |
| 1149 | + |
| 1150 | + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig())) |
| 1151 | + { |
| 1152 | + var db = redis.GetDatabase(0); |
| 1153 | + ClassicAssert.AreEqual("0000", (string)db.Execute("RI.GET", index, "0000"), |
| 1154 | + "data should survive checkpoint + recovery"); |
| 1155 | + } |
| 1156 | + |
| 1157 | + // The recovered multi-level tree is now live. Disposing the server (here in TearDown via |
| 1158 | + // RangeIndexManager.Dispose) drops it and trips the bf-tree assert, aborting the process. |
| 1159 | + } |
| 1160 | + |
| 1161 | + // Create a disk-backed BfTree, insert `records` tiny 4-byte key/value records, CprSnapshot it, |
| 1162 | + // then recover from that snapshot and drop the recovered tree. With these params (CACHESIZE |
| 1163 | + // 65536, MINRECORD 8, key=value=i.ToString("D4")) the tree becomes MULTI-LEVEL at exactly 378 |
| 1164 | + // records: dropping a recovered multi-level tree aborts the process in bftree_drop |
| 1165 | + // (assertion failed: next_level.is_null() at mini_page_op.rs:429, on a bf-tree background |
| 1166 | + // thread), while 377 records stays single-level and drops cleanly. The crash is driven by |
| 1167 | + // total data volume vs the small cache (the multi-level split), not by record size. Every |
| 1168 | + // Insert is asserted to succeed so a silent InvalidKV (limit) failure cannot masquerade as a |
| 1169 | + // valid repro. The 377/378 boundary is deterministic and identical on net8/net10 and |
| 1170 | + // Windows/Linux (the structure is fixed by the native bf-tree binary + the inserted data). |
| 1171 | + private static void SeedRecoverDrop(int records) |
| 1172 | + { |
| 1173 | + // The crashing variant aborts the process mid-test, so its temp dir is never cleaned up |
| 1174 | + // afterward. Sweep any leftovers from prior (crashed) runs up front so they don't pile up. |
| 1175 | + foreach (var stale in Directory.EnumerateDirectories(Path.GetTempPath(), "bftree_cprrepro_*")) |
| 1176 | + { |
| 1177 | + try { Directory.Delete(stale, recursive: true); } catch { } |
| 1178 | + } |
| 1179 | + |
| 1180 | + var dir = Path.Combine(Path.GetTempPath(), $"bftree_cprrepro_{Guid.NewGuid():N}"); |
| 1181 | + Directory.CreateDirectory(dir); |
| 1182 | + |
| 1183 | + var seed = new BfTreeService(StorageBackendType.Disk, |
| 1184 | + Path.Combine(dir, "seed.data.bftree"), Path.Combine(dir, "seed.scratch.cpr"), |
| 1185 | + cbSizeByte: 65536, cbMinRecordSize: 8); |
| 1186 | + for (var i = 0; i < records; i++) |
| 1187 | + { |
| 1188 | + var bytes = Encoding.ASCII.GetBytes(i.ToString("D4")); |
| 1189 | + ClassicAssert.AreEqual(BfTreeInsertResult.Success, seed.Insert(bytes, bytes), $"insert {i} should succeed"); |
| 1190 | + } |
| 1191 | + seed.CprSnapshot(); |
| 1192 | + var snapshot = Path.Combine(dir, "snap.bftree"); |
| 1193 | + File.Copy(Path.Combine(dir, "seed.scratch.cpr"), snapshot, overwrite: false); |
| 1194 | + seed.Dispose(); |
| 1195 | + |
| 1196 | + // Recover from the snapshot and drop: aborts in bftree_drop iff the tree is multi-level. |
| 1197 | + var tree = BfTreeService.RecoverFromCprSnapshot(snapshot, Path.Combine(dir, "rec.scratch.cpr"), StorageBackendType.Disk); |
| 1198 | + tree.Dispose(); |
| 1199 | + |
| 1200 | + // Best-effort cleanup for the non-crashing (below-threshold) path; the crashing path |
| 1201 | + // aborts before reaching here and is swept by the next run's up-front cleanup above. |
| 1202 | + try { Directory.Delete(dir, recursive: true); } catch { } |
| 1203 | + } |
| 1204 | + |
| 1205 | + /// <summary> |
| 1206 | + /// [Explicit] Pure-native form of <see cref="RICheckpointRecoverThenDrop_Crashes"/> calling |
| 1207 | + /// <see cref="BfTreeService"/> directly (no Garnet server). Inserting 378 tiny records makes |
| 1208 | + /// the tree MULTI-LEVEL; recovering it from a CprSnapshot and dropping it aborts the process |
| 1209 | + /// in <c>bftree_drop</c> (assertion failed: next_level.is_null() at mini_page_op.rs:429) on a |
| 1210 | + /// bf-tree background thread. 378 is exactly one above the threshold — see |
| 1211 | + /// <see cref="BfTreeRecoverFromCprSnapshotThenDrop_BelowThreshold_NoCrash"/>. Crashes the host |
| 1212 | + /// by design. |
| 1213 | + /// </summary> |
| 1214 | + [Test] |
| 1215 | + [Explicit("Crashes by design: 378 records (multi-level) recovered then dropped (bf-tree 0.5.0 bug).")] |
| 1216 | + public void BfTreeRecoverFromCprSnapshotThenDrop_Crashes() |
| 1217 | + { |
| 1218 | + SeedRecoverDrop(records: 378); |
| 1219 | + } |
| 1220 | + |
| 1221 | + /// <summary> |
| 1222 | + /// Control for <see cref="BfTreeRecoverFromCprSnapshotThenDrop_Crashes"/>: the exact same |
| 1223 | + /// create → snapshot → recover → drop sequence with 377 records — one below the multi-level |
| 1224 | + /// split threshold — keeps the tree SINGLE-LEVEL, so the recovered tree drops cleanly and the |
| 1225 | + /// test passes. The only difference from the crashing test is the record count (378 vs 377), |
| 1226 | + /// demonstrating the sharp structural threshold. |
| 1227 | + /// </summary> |
| 1228 | + [Test] |
| 1229 | + [Explicit("Control: 377 records (one below the threshold, single-level) drops cleanly (no crash).")] |
| 1230 | + public void BfTreeRecoverFromCprSnapshotThenDrop_BelowThreshold_NoCrash() |
| 1231 | + { |
| 1232 | + SeedRecoverDrop(records: 377); |
| 1233 | + Assert.Pass("Below-threshold (single-level) recovered tree dropped without crashing."); |
| 1234 | + } |
| 1235 | + |
1103 | 1236 | /// <summary> |
1104 | 1237 | /// Full lifecycle test: create → insert → flush to read-only → promote → mutate → |
1105 | 1238 | /// evict to disk → restore from flush → checkpoint → recover. Verifies data |
|
0 commit comments