docs: clarify iterator API selection and fix cargo doc warnings

ktmlm · ktmlm · commit 6899ae810b61 · 2026-03-21T09:10:46.000+08:00
- Rewrite doc comments for iter(), iter_with_options(), iter_with_range(),
  and iter_with_prefix() to clearly explain when to use each API
- Add pruning comparison table (range pruning vs bloom filter pruning)
- Update README.md API reference with the same guidance
- Fix rustdoc warnings: escape brackets in types.rs, wrap Arc&lt;TableReader&gt;
  in backticks in db.rs
diff --git a/README.md b/README.md
@@ -190,13 +190,34 @@ impl DB {
     pub fn write_with_options(&self, batch: WriteBatch, options: &WriteOptions) -> Result<()>;
     pub fn iter(&self) -> Result<DBIterator>;
 
-    /// Prefix-optimized iteration. Uses prefix bloom filters to skip entire SST
-    /// files that don't contain `prefix`. Supports bounds via ReadOptions.
+    /// Prefix-bounded iteration — the fastest option for prefix-scoped queries.
+    ///
+    /// Applies two levels of SST pruning:
+    ///   1. Range pruning  — skips files whose key range doesn't overlap the prefix.
+    ///   2. Bloom filter   — skips files that don't contain the prefix (finer-grained).
+    ///
+    /// Use this when all target keys share a common prefix (e.g. `b"orders:"`).
+    /// For a sub-range within the prefix, set ReadOptions bounds.
     pub fn iter_with_prefix(&self, prefix: &[u8], options: &ReadOptions) -> Result<DBIterator>;
 
-    /// Full-scan iteration with optional key-range bounds.
-    /// WARNING: Does NOT use prefix bloom filters. For prefix-scoped queries,
-    /// prefer `iter_with_prefix()` which is significantly faster.
+    /// Arbitrary key-range iteration with SST range pruning.
+    ///
+    /// Skips SST files whose `[smallest, largest]` key range does not overlap
+    /// `[lower, upper)` — but does NOT use bloom filters.
+    ///
+    /// Use this when the scan range spans multiple prefixes and cannot be
+    /// expressed as a single `iter_with_prefix()` call (e.g. `[b"m", b"z")`).
+    /// For full-database scans, prefer `iter()` / `iter_with_options()`.
+    ///
+    /// Explicit bounds are merged with any bounds in ReadOptions (tighter wins).
+    ///
+    /// ## Pruning comparison
+    ///
+    /// | Method               | SST range pruning | Bloom filter pruning |
+    /// |----------------------|:-----------------:|:--------------------:|
+    /// | `iter()`             | ✗                 | ✗                    |
+    /// | `iter_with_range()`  | ✓                 | ✗                    |
+    /// | `iter_with_prefix()` | ✓                 | ✓                    |
     pub fn iter_with_range(&self, options: &ReadOptions, lower: Option<&[u8]>, upper: Option<&[u8]>) -> Result<DBIterator>;
 
     /// RAII snapshot — automatically released on drop.
diff --git a/src/db.rs b/src/db.rs
@@ -54,7 +54,7 @@ fn pool_take() -> Option<DBIterator> {
 }
 
 /// Return a DBIterator to the global pool for reuse.
-/// The iterator's sources are cleared to release Arc<TableReader> references,
+/// The iterator's sources are cleared to release `Arc<TableReader>` references,
 /// preventing stale SST files from being kept alive by pooled iterators.
 pub fn pool_return(mut iter: DBIterator) {
     iter.reset(Vec::new(), 0);
@@ -826,28 +826,59 @@ impl DB {
         Ok(None)
     }
 
-    /// Create a forward iterator over the database.
+    /// Create a forward iterator over the entire database.
+    ///
+    /// Scans all keys in order. No SST pruning is applied.
+    /// Equivalent to `iter_with_options(&ReadOptions::default())`.
+    ///
+    /// Prefer `iter_with_prefix()` or `iter_with_range()` when only a subset of
+    /// keys is needed — both can skip irrelevant SST files.
     pub fn iter(&self) -> Result<DBIterator> {
         self.iter_with_options(&ReadOptions::default())
     }
 
-    /// Create a forward iterator with options.
+    /// Create a forward iterator over the entire database with options.
     ///
-    /// Uses streaming TableIterators for SST files (O(1 block) memory per SST)
-    /// instead of loading entire tables into memory.
+    /// Uses streaming `TableIterator`s for SST files (O(1 block) memory per SST).
+    /// No SST pruning is applied; all files are visited.
+    ///
+    /// Use `ReadOptions::iterate_lower_bound` / `iterate_upper_bound` to restrict
+    /// the key range returned, but note that SST files are still opened for the
+    /// full scan. For query-time SST pruning use `iter_with_range()` or
+    /// `iter_with_prefix()`.
     pub fn iter_with_options(&self, options: &ReadOptions) -> Result<DBIterator> {
         self.iter_with_range(options, None, None)
     }
 
-    /// Create a forward iterator that only includes sources overlapping [lower_bound, upper_bound].
-    /// SST files outside this range are skipped entirely, avoiding costly block reads.
-    /// `None` bounds mean unbounded in that direction.
+    /// Create a forward iterator for an arbitrary key range `[lower_bound, upper_bound)`.
+    ///
+    /// SST files whose key range does not overlap `[lower_bound, upper_bound)` are
+    /// skipped entirely at construction time, avoiding unnecessary block reads.
+    /// `None` means unbounded in that direction.
+    ///
+    /// # When to use
+    ///
+    /// Use this when the scan range **does not align to a single key prefix** —
+    /// for example `[b"m", b"z")` spans many prefixes and cannot be expressed as
+    /// a single `iter_with_prefix()` call.
+    ///
+    /// If your range *does* align to a prefix (e.g. all keys starting with
+    /// `b"user:"`), prefer `iter_with_prefix()`: it additionally uses bloom
+    /// filters to skip SST files that don't contain the prefix, which is more
+    /// precise than range-metadata pruning alone.
     ///
-    /// **WARNING**: Does NOT use prefix bloom filters. For prefix-scoped queries,
-    /// prefer `iter_with_prefix()` which is significantly faster.
+    /// # Pruning comparison
     ///
-    /// Bounds from `ReadOptions::iterate_lower_bound` / `iterate_upper_bound` are
-    /// also applied if set, using the tighter of the two.
+    /// | Method              | SST range pruning | Bloom filter pruning |
+    /// |---------------------|:-----------------:|:--------------------:|
+    /// | `iter()`            | ✗                 | ✗                    |
+    /// | `iter_with_range()` | ✓                 | ✗                    |
+    /// | `iter_with_prefix()`| ✓                 | ✓                    |
+    ///
+    /// # Bound merging
+    ///
+    /// Explicit `lower_bound`/`upper_bound` parameters are merged with any bounds
+    /// already set in `ReadOptions`, using the tighter of the two.
     pub fn iter_with_range(
         &self,
         options: &ReadOptions,
@@ -1017,12 +1048,27 @@ impl DB {
 
     /// Create a prefix-bounded iterator with full options support.
     ///
-    /// Uses prefix bloom filters to skip SST files that don't contain the prefix,
-    /// and stops iteration as soon as the prefix boundary is crossed.
-    /// Significantly faster than `iter_with_range()` for prefix-scoped queries.
+    /// Iterates over all keys that start with `prefix` in order.
+    ///
+    /// # When to use
+    ///
+    /// Use this whenever your query is naturally prefix-scoped — for example,
+    /// all keys under a tenant (`b"tenant_42:"`), a table (`b"orders:"`), etc.
+    /// It is the fastest iterator variant because it applies **two levels of
+    /// SST pruning**:
+    ///
+    /// 1. **Range pruning** — skips SST files whose `[smallest, largest]` key
+    ///    range does not overlap the prefix.
+    /// 2. **Bloom filter pruning** — among the remaining files, skips those
+    ///    whose per-block bloom filters report that `prefix` is absent.
+    ///
+    /// For cross-prefix ranges (e.g. `[b"m", b"z")`) use `iter_with_range()`
+    /// instead, as there is no single prefix that covers the query.
+    ///
+    /// # Sub-range within a prefix
     ///
-    /// Supports `ReadOptions::iterate_lower_bound` / `iterate_upper_bound` for
-    /// sub-range queries within a prefix, and `snapshot` for historical reads.
+    /// Set `ReadOptions::iterate_lower_bound` / `iterate_upper_bound` to further
+    /// restrict iteration to a sub-range inside the prefix.
     pub fn iter_with_prefix(&self, prefix: &[u8], options: &ReadOptions) -> Result<DBIterator> {
         let seq = options.snapshot.unwrap_or_else(|| self.current_sequence());
         self.iter_with_prefix_inner(prefix, seq, options)
diff --git a/src/types.rs b/src/types.rs
@@ -1,7 +1,7 @@
 //! Core data types: InternalKey, ValueType, SequenceNumber.
 //!
 //! Encoding follows RocksDB convention:
-//!   InternalKey = [user_key bytes][packed: seq << 8 | type]  (last 8 bytes)
+//!   InternalKey = \[user_key bytes\]\[packed: seq << 8 | type\]  (last 8 bytes)
 //!   Sort order: user_key ASC, sequence DESC, value_type DESC
 
 use std::cmp::Ordering;