Skip to content

Commit 88ff069

Browse files
RUBY-3521 Add binary vector support (#344)
1 parent fac91a9 commit 88ff069

File tree

16 files changed

+671
-8
lines changed

16 files changed

+671
-8
lines changed

.evergreen/config.yml

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,14 @@ axes:
328328
display_name: 7.0
329329
variables:
330330
WITH_ACTIVE_SUPPORT: "~> 7.0.0"
331+
- id: "7.1"
332+
display_name: 7.1
333+
variables:
334+
WITH_ACTIVE_SUPPORT: "~> 7.1.0"
335+
- id: "8.0"
336+
display_name: 8.0
337+
variables:
338+
WITH_ACTIVE_SUPPORT: "~> 8.0.0"
331339

332340
- id: "compact"
333341
display_name: GC.compact
@@ -365,9 +373,18 @@ buildvariants:
365373

366374
- matrix_name: "activesupport-7"
367375
matrix_spec:
368-
ruby: ["ruby-3.3", "ruby-2.7"]
376+
ruby: ruby-3.3
377+
all-os: ubuntu2004
378+
as: [ '7.0', '7.1' ]
379+
display_name: "AS ${as} ${ruby}, ${all-os}"
380+
tasks:
381+
- name: "test"
382+
383+
- matrix_name: "activesupport-8"
384+
matrix_spec:
385+
ruby: ruby-3.3
369386
all-os: ubuntu2004
370-
as: '7.0'
387+
as: '8.0'
371388
display_name: "AS ${as} ${ruby}, ${all-os}"
372389
tasks:
373390
- name: "test"

.evergreen/config/axes.yml.erb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ axes:
6060
display_name: 7.0
6161
variables:
6262
WITH_ACTIVE_SUPPORT: "~> 7.0.0"
63+
- id: "7.1"
64+
display_name: 7.1
65+
variables:
66+
WITH_ACTIVE_SUPPORT: "~> 7.1.0"
67+
- id: "8.0"
68+
display_name: 8.0
69+
variables:
70+
WITH_ACTIVE_SUPPORT: "~> 8.0.0"
6371

6472
- id: "compact"
6573
display_name: GC.compact

.evergreen/config/variants.yml.erb

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,18 @@ buildvariants:
2222

2323
- matrix_name: "activesupport-7"
2424
matrix_spec:
25-
ruby: <%= sample_mri_rubies %>
25+
ruby: <%= latest_mri_ruby %>
26+
all-os: ubuntu2004
27+
as: [ '7.0', '7.1' ]
28+
display_name: "AS ${as} ${ruby}, ${all-os}"
29+
tasks:
30+
- name: "test"
31+
32+
- matrix_name: "activesupport-8"
33+
matrix_spec:
34+
ruby: <%= latest_mri_ruby %>
2635
all-os: ubuntu2004
27-
as: '7.0'
36+
as: '8.0'
2837
display_name: "AS ${as} ${ruby}, ${all-os}"
2938
tasks:
3039
- name: "test"

Gemfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ group :development, :test do
1515
if ENV['WITH_ACTIVE_SUPPORT'] =~ /[0-9]/ && ENV['WITH_ACTIVE_SUPPORT'] != '0'
1616
gem 'activesupport', ENV['WITH_ACTIVE_SUPPORT']
1717
else
18-
gem 'activesupport', '<7.1'
18+
gem 'activesupport', '<8.1'
1919
end
20+
gem 'concurrent-ruby', '1.3.4'
2021
gem 'ruby-prof', platforms: :mri
2122

2223
gem 'byebug', platforms: :mri

bson.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Gem::Specification.new do |s|
3737
if RUBY_VERSION > '3.2.99'
3838
s.add_dependency 'base64'
3939
s.add_dependency 'bigdecimal'
40+
s.add_dependency 'ostruct'
4041
end
4142

4243
s.test_files = Dir.glob('spec/**/*')

lib/bson.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def self.ObjectId(string)
9595
require "bson/timestamp"
9696
require "bson/true_class"
9797
require "bson/undefined"
98+
require "bson/vector"
9899
require "bson/version"
99100

100101
# If we are using JRuby, attempt to load the Java extensions, if we are using

lib/bson/binary.rb

Lines changed: 125 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class Binary
5050
ciphertext: 6.chr,
5151
column: 7.chr,
5252
sensitive: 8.chr,
53+
vector: 9.chr,
5354
user: 128.chr,
5455
}.freeze
5556

@@ -61,6 +62,16 @@ class Binary
6162
# @since 2.0.0
6263
TYPES = SUBTYPES.invert.freeze
6364

65+
# Types of vector data.
66+
VECTOR_DATA_TYPES = {
67+
int8: '0x03'.hex,
68+
float32: '0x27'.hex,
69+
packed_bit: '0x10'.hex
70+
}.freeze
71+
72+
# @api private
73+
VECTOR_DATA_TYPES_INVERSE = VECTOR_DATA_TYPES.invert.freeze
74+
6475
# @return [ String ] The raw binary data.
6576
#
6677
# The string is always stored in BINARY encoding.
@@ -89,6 +100,7 @@ def ==(other)
89100

90101
type == other.type && data == other.data
91102
end
103+
92104
alias eql? ==
93105

94106
# Compare this binary object to another object. The two objects must have
@@ -113,7 +125,7 @@ def <=>(other)
113125
#
114126
# @since 2.3.1
115127
def hash
116-
[ data, type ].hash
128+
[data, type].hash
117129
end
118130

119131
# Return a representation of the object for use in
@@ -146,6 +158,26 @@ def as_extended_json(**options)
146158
end
147159
end
148160

161+
# Decode the binary data as a vector data type.
162+
#
163+
# @return [ BSON::Vector ] The decoded vector data.
164+
def as_vector
165+
raise BSON::Error, "Cannot decode subtype #{type} as vector" unless type == :vector
166+
167+
dtype_value, padding, = data[0..1].unpack('CC')
168+
dtype = VECTOR_DATA_TYPES_INVERSE[dtype_value]
169+
raise ArgumentError, "Unsupported vector type: #{dtype_value}" unless dtype
170+
171+
format = case dtype
172+
when :int8 then 'c*'
173+
when :float32 then 'f*'
174+
when :packed_bit then 'C*'
175+
else
176+
raise ArgumentError, "Unsupported type: #{dtype}"
177+
end
178+
BSON::Vector.new(data[2..-1].unpack(format), dtype, padding)
179+
end
180+
149181
# Instantiate the new binary object.
150182
#
151183
# This method accepts a string in any encoding; however, if a string is
@@ -368,8 +400,97 @@ def self.from_python_legacy_uuid(uuid_binary)
368400
new(uuid_binary, :uuid_old)
369401
end
370402

403+
# Constructs a new binary object from a binary vector.
404+
405+
# @param [ BSON::Vector | Array ] vector The vector data.
406+
# @param [ Symbol | nil ] dtype The vector data type, must be nil if vector is a BSON::Vector.
407+
# @param [ Integer ] padding The number of bits in the final byte that are to
408+
# be ignored when a vector element's size is less than a byte. Must be 0 if vector is a BSON::Vector.
409+
# @param [ Boolean ] validate_vector_data Whether to validate the vector data.
410+
#
411+
# @return [ BSON::Binary ] The binary object.
412+
def self.from_vector(vector, dtype = nil, padding = 0, validate_vector_data: false)
413+
data, dtype, padding = extract_args_for_vector(vector, dtype, padding)
414+
validate_args_for_vector!(data, dtype, padding)
415+
416+
format = case dtype
417+
when :int8 then 'c*'
418+
when :float32 then 'f*'
419+
when :packed_bit then 'C*'
420+
else raise ArgumentError, "Unsupported type: #{dtype}"
421+
end
422+
if validate_vector_data
423+
validate_vector_data!(data, dtype)
424+
end
425+
metadata = [ VECTOR_DATA_TYPES[dtype], padding ].pack('CC')
426+
data = data.pack(format)
427+
new(metadata.concat(data), :vector)
428+
end
429+
371430
private
372431

432+
# Extracts the arguments for a binary vector.
433+
#
434+
# @param [ BSON::Vector | Array ] vector The vector data.
435+
# @param [ ::Symbol | nil ] dtype The vector data type, must be nil if vector is a BSON::Vector.
436+
# @param [ Integer ] padding The padding. Must be 0 if vector is a BSON::Vector.
437+
#
438+
# @return [ Array ] The extracted data, dtype, and padding.
439+
def self.extract_args_for_vector(vector, dtype, padding)
440+
if vector.is_a?(BSON::Vector)
441+
if dtype || padding != 0
442+
raise ArgumentError, 'Do not specify dtype and padding if the first argument is BSON::Vector'
443+
end
444+
445+
data = vector.data
446+
dtype = vector.dtype
447+
padding = vector.padding
448+
else
449+
data = vector
450+
end
451+
[ data, dtype, padding ]
452+
end
453+
private_class_method :extract_args_for_vector
454+
455+
# Validate the arguments for a binary vector.
456+
# @param [ Array ] data The vector data.
457+
# @param [ ::Symbol ] dtype The vector data type.
458+
# @param [ Integer | nil ] padding The padding. Must be 0 if vector is a BSON::Vector.
459+
# @raise [ ArgumentError ] If the arguments are invalid.
460+
def self.validate_args_for_vector!(data, dtype, padding)
461+
raise ArgumentError, "Unknown dtype #{dtype}" unless VECTOR_DATA_TYPES.key?(dtype)
462+
463+
if %i[int8 float32].include?(dtype)
464+
raise ArgumentError, 'Padding applies only to packed_bit' if padding != 0
465+
elsif padding.positive? && data.empty?
466+
raise ArgumentError, 'Padding must be zero when the vector is empty for PACKED_BIT'
467+
elsif padding.negative? || padding > 7
468+
raise ArgumentError, "Padding must be between 0 and 7, got #{padding}"
469+
end
470+
end
471+
private_class_method :validate_args_for_vector!
472+
473+
# Validate that all the values in the vector data are valid for the given dtype.
474+
#
475+
# @param [ Array ] data The vector data.
476+
# @param [ ::Symbol ] dtype The vector data type.
477+
def self.validate_vector_data!(data, dtype)
478+
validator = case dtype
479+
when :int8
480+
->(v) { v.is_a?(Integer) && v.between?(-128, 127) }
481+
when :float32
482+
->(v) { v.is_a?(Float) }
483+
when :packed_bit
484+
->(v) { v.is_a?(Integer) && v.between?(0, 255) }
485+
else
486+
raise ArgumentError, "Unsupported type: #{dtype}"
487+
end
488+
data.each do |v|
489+
raise ArgumentError, "Invalid value #{v} for type #{dtype}" unless validator.call(v)
490+
end
491+
end
492+
private_class_method :validate_vector_data!
493+
373494
# initializes an instance of BSON::Binary.
374495
#
375496
# @param [ String ] data the data to initialize the object with
@@ -398,7 +519,7 @@ def from_uuid_to_uuid(representation)
398519
if representation != :standard
399520
raise ArgumentError,
400521
'Binary of type :uuid can only be stringified to :standard representation, ' \
401-
"requested: #{representation.inspect}"
522+
"requested: #{representation.inspect}"
402523
end
403524

404525
data
@@ -490,7 +611,8 @@ def validate_type!(type)
490611
validate_integer_type!(type.bytes.first)
491612
end
492613
when Symbol then validate_symbol_type!(type)
493-
else raise BSON::Error::InvalidBinaryType, type
614+
else
615+
raise BSON::Error::InvalidBinaryType, type
494616
end
495617
end
496618

lib/bson/vector.rb

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright (C) 2025-present MongoDB Inc.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
module BSON
18+
# Vector of numbers along with metadata for binary interoperability.
19+
class Vector < ::Array
20+
21+
# @return [ Integer ] The data type stored in the vector.
22+
attr_reader :dtype
23+
24+
# @return [ Integer ] The number of bits in the final byte that are to
25+
# be ignored when a vector element's size is less than a byte
26+
# and the length of the vector is not a multiple of 8.
27+
attr_reader :padding
28+
29+
# @return [ BSON::ByteBuffer ] The data in the vector.
30+
def data
31+
self
32+
end
33+
34+
# @param [ ::Array ] data The data to initialize the vector with.
35+
# @param [ Integer ] dtype The data type of the vector.
36+
# @param [ Integer ] padding The number of bits in the final byte that are to
37+
# be ignored when a vector element's size is less than a byte
38+
# and the length of the vector is not a multiple of 8.
39+
def initialize(data, dtype, padding)
40+
@dtype = dtype
41+
@padding = padding
42+
super(data.dup)
43+
end
44+
end
45+
end

0 commit comments

Comments
 (0)