Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/storage/index_hnsw/hnsw_algo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,15 +277,17 @@ namespace cubhnsw
inline distance_t compute_distance_from_query_ (const float *query, const slot_id_t &slot) const
{
pinned_t vec_blk = m_storage->get_vector_by_slot_id (slot, lock_mode::shared);
return compute_distance_ (query, reinterpret_cast<const float *> (vec_blk->data));
node_type node = node_type (vec_blk->data);
return compute_distance_ (query, node.get_vector());
}

inline distance_t compute_distance_between (const slot_id_t &a, const slot_id_t &b) const
{
auto get_vec = [&] (const slot_id_t &slot) -> const float *
{
pinned_t vec_blk = m_storage->get_vector_by_slot_id (slot, lock_mode::shared);
return reinterpret_cast<const float *> (vec_blk->data);
node_type node = node_type (vec_blk->data);
return node.get_vector();
};

return compute_distance_ (get_vec (a), get_vec (b));
Expand Down
46 changes: 31 additions & 15 deletions src/storage/index_hnsw/hnsw_graph_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,13 @@ namespace cubhnsw
{
return tape_;
}
byte_t *vector_tape () const noexcept
{
return tape_ + offset_vector;
}
byte_t *neighbors_tape() const noexcept
{
return tape_ + offset_neighbors;
return tape_ + get_neighbors_offset ();
}
explicit operator bool() const noexcept
{
Expand All @@ -180,9 +184,10 @@ namespace cubhnsw
node_t &operator= (node_t &&) noexcept = default;

static constexpr std::size_t offset_key = 0;
static constexpr std::size_t offset_vec_slot = sizeof (OID);
static constexpr std::size_t offset_level = offset_vec_slot + sizeof (slot_id_t);
static constexpr std::size_t offset_neighbors = offset_level + sizeof (level_t);
static constexpr std::size_t offset_level = offset_key + sizeof (slot_id_t);
static constexpr std::size_t offset_neighbors_offset = offset_level + sizeof (level_t);
static constexpr std::size_t offset_vector = offset_neighbors_offset + sizeof (std::size_t);
static constexpr std::size_t offset_header_end = offset_vector;

OID get_key() const noexcept
{
Expand All @@ -193,33 +198,44 @@ namespace cubhnsw
return misaligned_store<OID> (tape_, v);
}

slot_id_t get_vec_slot() const noexcept
level_t get_level() const noexcept
{
return misaligned_load<level_t> (tape_ + offset_level);
}
void set_level (level_t v) noexcept
{
return misaligned_store<level_t> (tape_ + offset_level, v);
}
const float *get_vector() const noexcept
{
return misaligned_load<slot_id_t> (tape_ + offset_vec_slot);
return reinterpret_cast<const float *> (vector_tape());
}
void set_vec_slot (slot_id_t v) noexcept
void set_vector (const float *v, std::size_t dim) noexcept
{
return misaligned_store<slot_id_t> (tape_ + offset_vec_slot, v);
std::size_t offset = offset_header_end + sizeof (float) * dim;
set_neighbors_offset (offset);
std::memcpy (vector_tape(), v, dim * sizeof (float));
}

level_t get_level() const noexcept
std::size_t get_neighbors_offset () const noexcept
{
return misaligned_load<level_t> (tape_ + offset_level);
return misaligned_load<std::size_t> (tape_ + offset_neighbors_offset);
}
void set_level (level_t v) noexcept

void set_neighbors_offset (std::size_t offset) noexcept
{
return misaligned_store<level_t> (tape_ + offset_level, v);
return misaligned_store<std::size_t> (tape_ + offset_neighbors_offset, offset);
}

static constexpr std::size_t get_size() noexcept
static constexpr std::size_t get_size (std::size_t dim, std::size_t neighbors_count) noexcept
{
return offset_neighbors;
return offset_vector + sizeof (float) * dim;
}

std::string dump() const noexcept
{
std::stringstream ss;
ss << "key: " << dump_oid (get_key()) << ", vec_slot: " << dump_slot (get_vec_slot()) << ", level: " << get_level();
ss << "key: " << dump_oid (get_key()) << ", level: " << get_level() << ", neighbors_offset: " << get_neighbors_offset();
return ss.str();
}

Expand Down
8 changes: 4 additions & 4 deletions src/storage/index_hnsw/hnsw_storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,14 @@ namespace cubhnsw
return level > 0 ? node_neighbors_bytes_ (level - 1) : 0;
}

inline std::size_t node_bytes_ (level_t level) const noexcept
inline std::size_t node_bytes_ (level_t level, std::size_t dim, std::size_t neighbors_count) const noexcept
{
return node_head_bytes_() + node_neighbors_bytes_ (level);
return node_head_bytes_ (dim, neighbors_count) + node_neighbors_bytes_ (level);
}

inline std::size_t node_head_bytes_() const noexcept
inline std::size_t node_head_bytes_ (std::size_t dim, std::size_t neighbors_count) const noexcept
{
return node_t<Traits>::get_size();
return node_t<Traits>::get_size (dim, neighbors_count);
}

protected:
Expand Down
47 changes: 3 additions & 44 deletions src/storage/index_hnsw/hnsw_storage_disk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ namespace cubhnsw
m_vfid = giid.vfid;
m_root_vpid = VPID { giid.root_pageid, giid.vfid.volid };
m_last_node_vpid = m_root_vpid;

m_vec_pool_vfid = VFID_INITIALIZER;
m_last_vec_vpid = VPID_INITIALIZER;
}

disk_storage::~disk_storage ()
Expand All @@ -57,9 +54,6 @@ namespace cubhnsw
{
root_disk_t<disk_traits_t> root { reinterpret_cast<byte_t *> (root_block) };

(void) create_continous_file (m_thread_p, m_vec_pool_vfid, m_last_vec_vpid);
root.set_vec_pool_vfid (m_vec_pool_vfid);

root_size = root.get_size();
}

Expand Down Expand Up @@ -141,18 +135,8 @@ namespace cubhnsw
disk_storage::slot_id_t
disk_storage::add_node (const OID &key, const float *vector, const level_t &level)
{
// insert vector first
slot_id_t vec_slot = add_vector (key, vector);
#if 0
if (vec_slot.pageid == -1)
{
assert (false);
return slot_id_t { -1, -1, -1 };
}
#endif

// insert node
std::size_t bytes = this->node_bytes_ (level);
std::size_t bytes = this->node_bytes_ (level, get_dimension(), get_connectivity());
page_handle page_ptr = get_page_to_insert (m_vfid, m_last_node_vpid, bytes);

RECDES recdes;
Expand All @@ -167,8 +151,8 @@ namespace cubhnsw

node_t<disk_traits_t> node { reinterpret_cast<byte_t *> (rec_buf) };
node.set_key (key);
node.set_vec_slot (vec_slot);
node.set_level (level);
node.set_vector (vector, get_dimension());

PGSLOTID slot_id;

Expand Down Expand Up @@ -258,32 +242,7 @@ namespace cubhnsw
disk_storage::get_vector_by_slot_id (const slot_id_t &slot, const lock_mode &mode)
{
// get node by slot id
pinned_t node_blk = get_node_by_slot_id (slot, lock_mode::shared);
node_t<disk_traits_t> node = node_t<disk_traits_t> (node_blk.get().data);
slot_id_t vec_slot = node.get_vec_slot();

// =====================================================================

VPID vpid = { vec_slot.pageid, vec_slot.volid };

// updating vectors is not allowed
assert (mode == lock_mode::shared);

PAGE_PTR vec_page_ptr = pgbuf_fix (m_thread_p, &vpid, OLD_PAGE, PGBUF_LATCH_READ, PGBUF_UNCONDITIONAL_LATCH);
assert (vec_page_ptr != nullptr);

SPAGE_SLOT *slotp = spage_get_slot (vec_page_ptr, vec_slot.slotid);
assert (slotp != nullptr);

return make_pinned_block<disk_traits_t> (vec_slot, (std::byte *) vec_page_ptr + slotp->offset_to_record,
slotp->record_length, mode,
[this, vec_page_ptr] (auto& blk) noexcept
{
assert (blk.mode == lock_mode::shared);
pgbuf_unfix (m_thread_p, reinterpret_cast<PAGE_PTR> (vec_page_ptr));
}

);
return get_node_by_slot_id (slot, lock_mode::shared);
}

// promote lockmode from shared to exclusive
Expand Down
25 changes: 4 additions & 21 deletions src/storage/index_hnsw/hnsw_storage_disk.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <unordered_map>
#include <cstring>

#include "hnsw_storage.hpp" // storage<memory_id_traits>
#include "hnsw_storage.hpp"
#include "thread_compat.hpp"

namespace cubhnsw
Expand All @@ -51,30 +51,13 @@ namespace cubhnsw

explicit root_disk_t (byte_t *tape) noexcept : root_t<ID_TRAITS> (tape) {}

static constexpr std::size_t offset_vec_pool_id = root_t<ID_TRAITS>::offset_entry + sizeof (slot_id_t);
static constexpr std::size_t offset_vec_bucket_id = offset_vec_pool_id + sizeof (block_group_id_t);
static constexpr std::size_t offset_begin = root_t<ID_TRAITS>::get_size();

misaligned_ref_gt<block_group_id_t> get_vec_pool_vfid() const noexcept
{
return {this->tape() + offset_vec_pool_id};
}
void set_vec_pool_vfid (block_group_id_t vfid) noexcept
{
return misaligned_store<block_group_id_t> (this->tape() + offset_vec_pool_id, vfid);
}

misaligned_ref_gt<block_id_t> get_last_vec_vpid() const noexcept
{
return {this->tape() + offset_vec_bucket_id};
}
void set_last_vec_bucket_vpid (block_id_t vpid) noexcept
{
return misaligned_store<VPID> (this->tape() + offset_vec_bucket_id, vpid);
}
// TODO: extract extra data from root

static constexpr std::size_t get_bytes() noexcept
{
return root_t<ID_TRAITS>::get_size() + sizeof (block_group_id_t) + sizeof (block_id_t);
return root_t<ID_TRAITS>::get_size();
}
};

Expand Down
Loading