Embedding Model works, But CustomVectorizer returns HttpResponseError

When I create an Azure AI Search Index using CustomVectorizer and then perform a query, an HttpResponseError occurs.

CustomVectorizer used my custom E5 embedding model as an endpoint in Azure Machine Learning.

The log in Azure Machine Learning Endpoints returns 200 ok.

However, performing a search returns the following error:
```
HttpResponseError: () Could not vectorize the query because the vectorization endpoint response is invalid.
Code: 
Message: Could not vectorize the query because the vectorization endpoint response is invalid.
File <command-8161632147394724>, line 5
      1 from itertools import tee
      3 results, results_backup = tee(results)
----> 5 for i, r in enumerate(results):
      6     print(r)
      8 results_backup
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-a4d14cee-3610-4575-b83e-16f37fffb48a/lib/python3.12/site-packages/azure/search/documents/_generated/operations/_documents_operations.py:778, in DocumentsOperations.search_post(self, search_request, request_options, **kwargs)
    776     map_error(status_code=response.status_code, response=response, error_map=error_map)
    777     error = self._deserialize.failsafe_deserialize(_models.ErrorResponse, pipeline_response)
--> 778     raise HttpResponseError(response=response, model=error)
    780 deserialized = self._deserialize("SearchDocumentsResult", pipeline_response)
    782 if cls:
```

https://github.com/Azure/azure-search-vector-samples/blob/main/demo-python/code/custom-vectorizer/scripts/setup_search_service.py
My code written and executed by referring to the setup_search_service.py file is as follows:

```
from azure.search.documents import SearchClient
import json
from azure.search.documents.indexes import SearchIndexClient
from azure.core.pipeline.policies import HTTPPolicy
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SemanticConfiguration,
    SemanticSearch,
    SemanticField,
    SemanticPrioritizedFields,
    SearchIndex,
    CustomVectorizer,
    CustomWebApiParameters
)

# Workaround required to use the preview SDK
class CustomVectorizerRewritePolicy(HTTPPolicy):
    def send(self, request):
        request.http_request.body = request.http_request.body.replace('customVectorizerParameters', 'customWebApiParameters')
        return self.next.send(request)

# Create a search index
# https://learn.microsoft.com/en-us/python/api/azure-search-documents/azure.search.documents.indexes.searchindexclient?view=azure-python
index_client = SearchIndexClient(endpoint=endpoint, credential=credential, per_call_policies=[CustomVectorizerRewritePolicy()])

fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="title", type=SearchFieldDataType.String),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SearchableField(name="category", type=SearchFieldDataType.String, filterable=True),
    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
]

headers = {'Authorization':('Bearer '+ aml_endpoint_key)}

# Configure the vector search configuration
# HNSW: Hierarchical Navigable Small World algorithm
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="myHnsw"
        ),
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw",
            # vectorizer_name="endpt-kt-embeddings-integrate"
            vectorizer="customVectorizer",
        ),
    ],
    vectorizers=[
        CustomVectorizer(
            name="customVectorizer",
            custom_web_api_parameters=CustomWebApiParameters(
                uri=aml_endpoint_url,
                http_headers=headers,
                http_method="POST",
            )
        )
    ]
)


# Create the search index with the semantic settings
index = SearchIndex(name=index_name, fields=fields,
                    vector_search=vector_search)
result = index_client.create_or_update_index(index)
print(f'{result.name} created')


# Upload some documents to the index
output_path = os.path.join("../data", "docVectors-kse-2.json")
with open(output_path, 'r') as file:
    documents = json.load(file)
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)
result = search_client.upload_documents(documents)
print(f"Uploaded {len(documents)} documents")


#Perform a text similarity search
from azure.search.documents.models import VectorizableTextQuery

query = "tools for software development"
  
search_client = SearchClient(endpoint, index_name, credential=credential)
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=2, fields="contentVector")

results = search_client.search(
    search_text=None,
    vector_queries= [vector_query],
    select=["title", "content", "category"],
    top=1,
    include_total_count=True,
)


from itertools import tee

results, results_backup = tee(results)

print("**Print All**")

# Error occur
for i, r in enumerate(results):
    print(r)


print("**Print Key Result**")
for result in results_backup:
    print(f"Title: {result['title']}")
    print(f"Score: {result['@search.score']}")
    print(f"Content: {result['content']}")
    print(f"Category: {result['category']}\n")
```

https://github.com/Azure-Samples/azure-search-power-skills/blob/main/Common/WebAPISkillContract.cs
The score.py(Embedding Model Endpoint) I wrote, referring to the WebAPISkillContract.cs file, is as follows:
```
def run(raw_data):

    logger.debug("raw_data: %s", raw_data)

    input_data = json.loads(raw_data)

    input_values = input_data["values"]

    output = dict()
    output["values"] = []

    for input_value in input_values:
        value_dic = dict()
        
        if "text" in input_value["data"]:

            value_dic["recordId"] = input_value["recordId"]
            value_dic["data"] = dict()

            try:
                text_ndarr = model.encode(EMBEDDING_FORMAT.format(prefix="UNUSED0002", text=input_value["data"]["text"]))
                value_dic["data"]["hitPositions"] = text_ndarr.tolist()
                value_dic["errors"] = None
                value_dic["warnings"] = None
            except Warning as w:
                logger.debug("warning to encode", w)
                value_dic["data"]["hitPositions"] = text_ndarr.tolist()
                value_dic["errors"] = None
                value_dic["warnings"]["message"] = str(w)
            except Exception as e:
                logger.debug("fail to encode", e)
                value_dic["errors"]["message"] = str(e)
                value_dic["warnings"] = None
            
            output["values"].append(value_dic)
    
    logger.debug("output: %s", output)

    return json.dumps(output)
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Embedding Model works, But CustomVectorizer returns HttpResponseError #300

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Embedding Model works, But CustomVectorizer returns HttpResponseError #300

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions