Skip to content

highlights is getting dropped in DocumentSerializer #311

Open
@umaparvat

Description

@umaparvat

Hi All,

i'm using Django-elasticseach-dsl-drf document serializer for serializing the elastic search document. The requirement is to show the matching query in the response. So i've used highlight option in django-elasticsearch-dsl Search() in views.py. i can see the response with highlights. But when the response comes for deserialization, the key and value is getting dropped.

`
class IncidentDocumentSerializer(DocumentSerializer):

  highlights = serializers.SerializerMethodField()

  class Meta:
      document = IncidentDocument

      fields = (
          'incident_number',
          'incident_state',
          'site_id',
          'sub_site_id',
          'account_id',
          'sub_account_id',
          'closed_at',
          'state',
          'short_description',
          'description',
          'incident_parent_id'

      )

  def get_highlights(self, obj):
      try:
          obj.highlights.to_dict()
      except Exception as e:
          print(e, obj.highlights, type(obj.highlights)) # the output in debug mode: exce 'IncidentDocument' object has no attribute 'highlights'

          return {}

`

views.py
`
class PaginatedElasticSearchAPIView(APIView, LimitOffsetPagination):
serializer_class = None
document_class = None
ins = None

@abc.abstractmethod
def generate_q_expressions(self, query):
    raise NotImplementedError

def get_search_instance(self, query):
    q = self.ins.generate_q_expressions(query)
    search = self.ins.document_class.search().highlight("*", require_field_match=False).extra(size=10000).query(q)
    return search

def get(self, request, query):
    try:
        search = self.get_search_instance(query)
        response = search.execute()
        for ind, each_data in enumerate(response.hits.hits):
            highlight = each_data.highlight
            for key in highlight:
                #  print(key, highlight[key])
                response.hits.hits[ind][key] = highlight[key][0]
                #print(response.hits.hits[ind][key])

        results = self.paginate_queryset(response, request, view=self)
        #  print("res", len(results))
        serializer = self.ins.serializer_class(results, many=True)

        #print(type(serializer.data))
        return self.get_paginated_response(serializer.data)
    except Exception as e:
        print("exce", e)
        return HttpResponse(e, status=500)

class SearchFactory:

def get_ins(self, type):
    if type == "incident":
        return SearchIncidents()
    elif type == "case":
        return SearchCases()
    elif type == "tac":
        return SearchTacSupportCase()
    else:
        return SearchAll()

class GlobalSearch(PaginatedElasticSearchAPIView):
def get(self, request, query):
ins = SearchFactory().get_ins(request.GET.get("type", "empty"))
self.ins = ins
return super().get(request, query)

class SearchIncidents:
serializer_class = IncidentDocumentSerializer
document = IncidentDocument
document_class = IncidentDocument

def generate_q_expressions(self, query):
    return Q(
        'multi_match',
        query=query,
        fields=[
            'incident_number',
            'incident_parent_id'
            'urgency',
            'impact',
            'account_id',
            'site_id',
            'short_description',
            'description',
            'sub_site_id'
        ],
        fuzziness='auto'
    )

`

i've to replace the highlight field and value with document field and value . the serialiser drops the highlight field. This has performance impact. The below code part in the views.py

so i modified the response obj replacing the document field with the corresponding highlight field. In that case, the value which has tag is replaced as normal.

Printed the message before deserialisation whether document key is replaced with highlight key. it has tag.
Device <em>AP</em> AP1_812 on Controller AP Noise Floor in the last 15 mins

After desierialisation, the output shows.
Device AP AP1_812 on Controller AP Noise Floor in the last 15 mins

the API response from Elastic search for your reference.

Elastic search query:
GET itsm_incidents/_search { "query": { "multi_match" : { "query": "AP SP", "fields": [ "incident_number", "description", "short_description" , "incident_number.suggest"] } }, "track_total_hits": true, "highlight": { "require_field_match": false, "fields": { "*": {} } } }
Elastic Search response.
{ "_index": "itsm_incidents", "_id": "INC0061201", "_score": 2.7809696, "_source": { "incident_number": "INC0061201", "incident_parent_id": {}, "incident_state": "In Progress", "site_id": "ACCT0022395", "sub_site_id": "ACCT0022410", "account_id": "ACCT0022391", "sub_account_id": "ACCT0022394", "closed_at": null, "state": "In Progress", "short_description": "ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap", "description": "Memory utilization for AP MSAN-AP-1 with serial CNG2CW has been above 1% for about 5 minutes since 2022-05-24 00:07:07 UTC" }, "highlight": { "short_description": [ "<em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization", "<em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em>" ], "description": [ "Memory utilization for <em>AP</em> MSAN-<em>AP</em>-1 with serial CNG2CW has been above 1% for about 5 minutes" ] } },

  1. why highlight is being dropped.
  2. why the tag in the text field is getting dropped when using Document serializer ?
  3. How to replace the highlight matched fields with the document fields in serializer ?

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions