|
28 | 28 | namespace milvus { |
29 | 29 |
|
30 | 30 | SparseFloatVecFieldData::ElementT |
31 | | -DecodeSparseFloatVector(std::string& bytes) { |
| 31 | +DecodeSparseFloatVector(const std::string& bytes) { |
32 | 32 | if (bytes.size() % 8 != 0) { |
33 | 33 | throw std::runtime_error("Unexpected binary string is received from server side!"); |
34 | 34 | } |
@@ -65,8 +65,8 @@ BuildFieldDataSparseVectors(const google::protobuf::RepeatedPtrField<std::string |
65 | 65 | auto end = cursor; |
66 | 66 | std::advance(end, count); |
67 | 67 | while (cursor != end) { |
68 | | - std::string bytes = *cursor; |
69 | | - data.emplace_back(std::move(DecodeSparseFloatVector(bytes))); |
| 68 | + const std::string& bytes = *cursor; |
| 69 | + data.emplace_back(DecodeSparseFloatVector(bytes)); |
70 | 70 | cursor++; |
71 | 71 | } |
72 | 72 | return data; |
@@ -358,13 +358,21 @@ CreateMilvusFieldData(const proto::schema::FieldData& proto_data, size_t offset, |
358 | 358 | return Status::OK(); |
359 | 359 | } |
360 | 360 | case proto::schema::DataType::JSON: { |
361 | | - std::vector<nlohmann::json> objects; |
| 361 | + // Don't use BuildFieldDataScalars here: JSON requires json::parse() for each element, |
| 362 | + // so we parse only the [offset, end) range to avoid parsing all elements upfront. |
362 | 363 | const auto& scalars_data = proto_scalars.json_data().data(); |
363 | | - for (const auto& s : scalars_data) { |
364 | | - objects.emplace_back(std::move(nlohmann::json::parse(s))); |
| 364 | + auto total = static_cast<size_t>(scalars_data.size()); |
| 365 | + if (offset >= total) { |
| 366 | + field_data = std::make_shared<JSONFieldData>(std::move(name), std::vector<JSONFieldData::ElementT>{}, |
| 367 | + std::move(valid_data)); |
| 368 | + return Status::OK(); |
| 369 | + } |
| 370 | + size_t end = (offset + count > total) ? total : offset + count; |
| 371 | + std::vector<JSONFieldData::ElementT> values; |
| 372 | + values.reserve(end - offset); |
| 373 | + for (size_t i = offset; i < end; ++i) { |
| 374 | + values.emplace_back(nlohmann::json::parse(scalars_data[static_cast<int>(i)])); |
365 | 375 | } |
366 | | - std::vector<JSONFieldData::ElementT> values = |
367 | | - BuildFieldDataScalars<JSONFieldData::ElementT>(objects, offset, count); |
368 | 376 | field_data = std::make_shared<JSONFieldData>(std::move(name), std::move(values), std::move(valid_data)); |
369 | 377 | return Status::OK(); |
370 | 378 | } |
@@ -498,7 +506,7 @@ FillStructValue(const FieldDataPtr& array_data, std::vector<std::vector<nlohmann |
498 | 506 | for (auto k = 0; k < actual_count; k++) { |
499 | 507 | const auto& arr = actual_ptr->Value(k); |
500 | 508 | if (structs.size() <= k) { |
501 | | - structs.emplace_back(std::move(std::vector<nlohmann::json>())); |
| 509 | + structs.emplace_back(); |
502 | 510 | structs[k].resize(arr.size()); |
503 | 511 | } |
504 | 512 | for (auto j = 0; j < arr.size(); j++) { |
@@ -593,7 +601,7 @@ ConvertStructFieldData(const proto::schema::FieldData& proto_data, size_t offset |
593 | 601 | vector_field.dim() * 4, floats.data(), floats.size(), 0, floats.size()); |
594 | 602 | auto num = k - offset; |
595 | 603 | if (structs.size() <= num) { |
596 | | - structs.emplace_back(std::move(std::vector<nlohmann::json>())); |
| 604 | + structs.emplace_back(); |
597 | 605 | structs[num].resize(vectors.size()); |
598 | 606 | } |
599 | 607 | for (auto j = 0; j < vectors.size(); j++) { |
@@ -746,8 +754,7 @@ SetEmbeddingLists(const std::vector<EmbeddingList>& emb_lists, proto::milvus::Se |
746 | 754 | std::string content; |
747 | 755 | content.reserve(emb_list.Count() * emb_list.Dim() * 4); |
748 | 756 | for (const auto& vector : vectors.Data()) { |
749 | | - std::string single_content(reinterpret_cast<const char*>(vector.data()), vector.size() * sizeof(float)); |
750 | | - content += single_content; |
| 757 | + content.append(reinterpret_cast<const char*>(vector.data()), vector.size() * sizeof(float)); |
751 | 758 | } |
752 | 759 | rpc_request->set_nq(static_cast<int64_t>(emb_list.Count())); |
753 | 760 | placeholder_value.add_values(std::move(content)); |
@@ -798,8 +805,8 @@ GenGetter(const FieldDataPtr& field) { |
798 | 805 | // special process float16/bfloat16 vector to float arrays |
799 | 806 | if (field->Type() == DataType::FLOAT16_VECTOR || field->Type() == DataType::BFLOAT16_VECTOR) { |
800 | 807 | bool is_fp16 = (field->Type() == DataType::FLOAT16_VECTOR); |
801 | | - std::vector<uint16_t> f16_vec = is_fp16 ? std::static_pointer_cast<Float16VecFieldData>(field)->Value(i) |
802 | | - : std::static_pointer_cast<BFloat16VecFieldData>(field)->Value(i); |
| 808 | + const auto& f16_vec = is_fp16 ? std::static_pointer_cast<Float16VecFieldData>(field)->Data()[i] |
| 809 | + : std::static_pointer_cast<BFloat16VecFieldData>(field)->Data()[i]; |
803 | 810 | std::vector<float> f32_vec; |
804 | 811 | f32_vec.reserve(f16_vec.size()); |
805 | 812 | std::transform(f16_vec.begin(), f16_vec.end(), std::back_inserter(f32_vec), |
@@ -938,7 +945,7 @@ GenGetters(const std::vector<FieldDataPtr>& fields) { |
938 | 945 | break; |
939 | 946 | } |
940 | 947 | } |
941 | | - return std::move(getters); |
| 948 | + return getters; |
942 | 949 | } |
943 | 950 |
|
944 | 951 | Status |
|
0 commit comments