Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit e2cf414

Browse files
committed
introduce unique ids
1 parent 6165e9d commit e2cf414

File tree

3 files changed

+53
-5
lines changed

3 files changed

+53
-5
lines changed

platform/model/typical_queries/hits.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
package typical_queries
44

55
import (
6+
"bytes"
67
"context"
8+
"crypto/sha256"
9+
"encoding/hex"
10+
"encoding/json"
711
"fmt"
812
"github.com/QuesmaOrg/quesma/platform/clickhouse"
913
"github.com/QuesmaOrg/quesma/platform/common_table"
@@ -241,7 +245,9 @@ func (query Hits) computeIdForDocument(doc model.SearchHit, defaultID string) st
241245
// At database level we only compare timestamps with millisecond precision
242246
// However in search results we append `q` plus generated digits (we use q because it's not in hex)
243247
// so that kibana can iterate over documents in UI
244-
pseudoUniqueId = fmt.Sprintf("%xq%s", vv, defaultID)
248+
sourceHash := fmt.Sprintf("%x", HashJSON(doc.Source))
249+
pseudoUniqueId = fmt.Sprintf("%xqqq%sqqq%x", vv, defaultID, sourceHash)
250+
//pseudoUniqueId = fmt.Sprintf("%xq%s", vv, defaultID)
245251
} else {
246252
logger.WarnWithCtx(query.ctx).Msgf("failed to convert timestamp field [%v] to time.Time", v[0])
247253
return defaultID
@@ -250,6 +256,19 @@ func (query Hits) computeIdForDocument(doc model.SearchHit, defaultID string) st
250256
return pseudoUniqueId
251257
}
252258

259+
func HashJSON(data json.RawMessage) string {
260+
var buf bytes.Buffer
261+
err := json.Compact(&buf, data)
262+
if err != nil {
263+
hash := sha256.Sum256(data)
264+
return hex.EncodeToString(hash[:])
265+
}
266+
hash := sha256.Sum256(buf.Bytes())
267+
eee := hex.EncodeToString(hash[:])
268+
println(eee)
269+
return hex.EncodeToString(hash[:])
270+
}
271+
253272
func (query Hits) String() string {
254273
return fmt.Sprintf("hits(indexes: %v)", strings.Join(query.indexes, ", "))
255274
}

platform/parsers/elastic_query_dsl/query_parser.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func NewEmptyHighlighter() model.Highlighter {
3434
}
3535

3636
const (
37-
defaultQueryResultSize = 10
37+
defaultQueryResultSize = 10000 // TODO looks like we can NOT limit the returned `hits` because we calculate IDs there
3838
defaultTrackTotalHits = 10000
3939
)
4040

@@ -322,6 +322,7 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) model.SimpleQue
322322
return model.NewSimpleQueryInvalid()
323323
}
324324
ids := make([]string, 0, len(idsRaw))
325+
uniqueIds := make([]string, 0, len(idsRaw)) // to avoid duplicates
325326
for _, id := range idsRaw {
326327
if idAsString, ok := id.(string); ok {
327328
ids = append(ids, idAsString)
@@ -335,14 +336,15 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) model.SimpleQue
335336
// therefore we need to strip the hex part (before `q`) and convert it to decimal
336337
// then we can query at DB level
337338
for i, id := range ids {
338-
idInHex := strings.Split(id, "q")[0]
339+
idInHex := strings.Split(id, "qqq")[0]
339340
if idAsStr, err := hex.DecodeString(idInHex); err != nil {
340341
logger.Error().Msgf("error parsing document id %s: %v", id, err)
341342
return model.NewSimpleQueryInvalid()
342343
} else {
343344
tsWithoutTZ := strings.TrimSuffix(string(idAsStr), " +0000 UTC")
344345
ids[i] = fmt.Sprintf("'%s'", tsWithoutTZ)
345346
}
347+
uniqueIds = append(uniqueIds, id)
346348
}
347349

348350
var idToSql func(string) (model.Expr, error)
@@ -399,6 +401,7 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) model.SimpleQue
399401
idsTuple := model.NewTupleExpr(idsAsExprs...)
400402
whereStmt = model.NewInfixExpr(model.NewColumnRef(timestampColumnName), " IN ", idsTuple)
401403
}
404+
cw.UniqueIDs = uniqueIds // TODO a crucial side effect here
402405
return model.NewSimpleQuery(whereStmt, true)
403406
}
404407

platform/parsers/elastic_query_dsl/query_translator.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"github.com/QuesmaOrg/quesma/platform/parsers/elastic_query_dsl/query_util"
1212
"github.com/QuesmaOrg/quesma/platform/schema"
1313
"github.com/QuesmaOrg/quesma/platform/util"
14+
"slices"
15+
"strings"
1416
)
1517

1618
type JsonMap = map[string]interface{}
@@ -24,7 +26,8 @@ type ClickhouseQueryTranslator struct {
2426
Indexes []string
2527

2628
// TODO this will be removed
27-
Table *clickhouse.Table
29+
Table *clickhouse.Table
30+
UniqueIDs []string // used for hits queries, to filter out hits that are not in the list of IDs
2831
}
2932

3033
var completionStatusOK = func() *int { value := 200; return &value }()
@@ -130,8 +133,31 @@ func (cw *ClickhouseQueryTranslator) makeHits(queries []*model.Query, results []
130133
}
131134
hitsPartOfResponse := hitsQuery.Type.TranslateSqlResponseToJson(hitsResultSet)
132135

136+
// trim hits
137+
133138
hitsResponse := hitsPartOfResponse["hits"].(model.SearchHits)
134-
return queriesWithoutHits, resultsWithoutHits, &hitsResponse
139+
hits := cw.RemoveHitsIfDocHashesSet(hitsResponse)
140+
return queriesWithoutHits, resultsWithoutHits, &hits
141+
}
142+
143+
func (cw *ClickhouseQueryTranslator) RemoveHitsIfDocHashesSet(hits model.SearchHits) model.SearchHits {
144+
// if we have doc hashes set, we need to remove hits from the response
145+
if len(cw.UniqueIDs) == 0 {
146+
return hits
147+
}
148+
docHashes := make([]string, 0, len(cw.UniqueIDs))
149+
for _, id := range cw.UniqueIDs {
150+
docHashes = append(docHashes, strings.Split(id, "qqq")[2])
151+
}
152+
filteredHits := make([]model.SearchHit, 0, len(hits.Hits))
153+
for _, hit := range hits.Hits {
154+
hashOfOfDocInHit := strings.Split(hit.ID, "qqq")[2]
155+
if slices.Contains(docHashes, hashOfOfDocInHit) {
156+
filteredHits = append(filteredHits, hit)
157+
}
158+
}
159+
hits.Hits = filteredHits
160+
return hits
135161
}
136162

137163
func (cw *ClickhouseQueryTranslator) makeTotalCount(queries []*model.Query, results [][]model.QueryResultRow) (queriesWithoutCount []*model.Query, resultsWithoutCount [][]model.QueryResultRow, total *model.Total) {

0 commit comments

Comments
 (0)