Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
0e25749
some done
trzysiek Oct 15, 2024
622c4cc
Merge branch 'main' into persistent-storage-2
trzysiek Nov 5, 2024
5928f10
Some more done
trzysiek Nov 5, 2024
3661361
Before reverse to 1 index
trzysiek Nov 8, 2024
56245b8
some more
trzysiek Nov 10, 2024
29679c3
Almost
trzysiek Nov 11, 2024
7656f8b
99% done, need to debug tests.
trzysiek Nov 11, 2024
49956b0
Some more
trzysiek Nov 11, 2024
144042a
All tests pass, maybe it works?
trzysiek Nov 11, 2024
a2c621e
Linter
trzysiek Nov 11, 2024
576b195
Minor, now should be all
trzysiek Nov 11, 2024
532f39e
Merge branch 'main' into persistent-storage-2
trzysiek Nov 11, 2024
47229e0
Fix linter
trzysiek Nov 11, 2024
47cc725
Fix config data for smoke test
trzysiek Nov 11, 2024
385fcb3
Debug for smoke test
trzysiek Nov 12, 2024
c37e8e3
Some fixes for smoke test
trzysiek Nov 12, 2024
aa72974
Merge branch 'main' into persistent-storage-2
trzysiek Nov 12, 2024
57982de
unskip all tests
trzysiek Nov 12, 2024
821b872
Reskip tests
trzysiek Nov 12, 2024
a96bd11
Style
trzysiek Nov 12, 2024
3921080
small add test
trzysiek Nov 12, 2024
eb17c02
Merge branch 'main' into persistent-storage-2
trzysiek Dec 20, 2024
b4ef499
Merge branch 'main' into persistent-storage-2
trzysiek Dec 20, 2024
45239e3
Cleanup
trzysiek Dec 20, 2024
51b1e68
Cleanup 2
trzysiek Dec 20, 2024
89a5180
Fix all tests
trzysiek Dec 20, 2024
dc40a90
Fix linter
trzysiek Dec 20, 2024
ffdc79e
Final if manual test passes
trzysiek Dec 20, 2024
cb402dc
Last: 1 name
trzysiek Dec 20, 2024
9afe54a
merge main
nablaone Feb 12, 2025
1499a49
Linter
nablaone Feb 12, 2025
a72f606
Unify error handling
nablaone Feb 12, 2025
6a72ca4
Remove gorountines. Simplification
nablaone Feb 12, 2025
96e9fa6
fmt
nablaone Feb 12, 2025
44b337c
Fix error handling
nablaone Feb 12, 2025
1abc897
Merge branch 'main' into persistent-storage-2
Mar 11, 2025
8419c0e
Fix compilation after merge
Mar 11, 2025
161407b
Merge branch 'main' into persistent-storage-2
trzysiek May 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions quesma/persistence/elastic.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,10 @@ func (p *ElasticJSONDatabase) List() ([]string, error) {
}`

resp, err := p.httpClient.Request(context.Background(), "GET", elasticsearchURL, []byte(query))

defer resp.Body.Close()
if err != nil {
return nil, err
}
defer resp.Body.Close()

jsonAsBytes, err := io.ReadAll(resp.Body)
if err != nil {
Expand All @@ -142,7 +141,7 @@ func (p *ElasticJSONDatabase) List() ([]string, error) {
var ids []string
// Unmarshal the JSON response
var result map[string]interface{}
if err := json.Unmarshal(jsonAsBytes, &result); err != nil {
if err = json.Unmarshal(jsonAsBytes, &result); err != nil {
log.Fatalf("Error parsing the response JSON: %s", err)
}

Expand Down
150 changes: 150 additions & 0 deletions quesma/persistence/elastic_with_eviction.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Copyright Quesma, licensed under the Elastic License 2.0.
// SPDX-License-Identifier: Elastic-2.0
package persistence

import (
"bytes"
"context"
"encoding/gob"
"encoding/json"
"fmt"
"io"
"net/http"
"quesma/logger"
"quesma/quesma/config"
)

const MAX_DOC_COUNT = 10000 // prototype TODO: fix/make configurable/idk/etc

// so far I serialize entire struct and keep only 1 string in ES
type ElasticDatabaseWithEviction struct {
ctx context.Context
*ElasticJSONDatabase // maybe remove and copy fields here
EvictorInterface
sizeInBytesLimit int64
}

func NewElasticDatabaseWithEviction(ctx context.Context, cfg config.ElasticsearchConfiguration, indexName string, sizeInBytesLimit int64) *ElasticDatabaseWithEviction {
return &ElasticDatabaseWithEviction{
ElasticJSONDatabase: NewElasticJSONDatabase(cfg, indexName),
EvictorInterface: &Evictor{},
sizeInBytesLimit: sizeInBytesLimit,
}
}

// mutexy? or what
func (db *ElasticDatabaseWithEviction) Put(id string, row Sizeable) bool {
bytesNeeded := db.SizeInBytes() + row.SizeInBytes()
if bytesNeeded > db.SizeInBytesLimit() {
logger.InfoWithCtx(db.ctx).Msg("Database is full, evicting documents")
//docsToEvict, bytesEvicted := db.SelectToEvict(db.getAll(), bytesNeeded-db.SizeInBytesLimit())
//db.evict(docsToEvict)
//bytesNeeded -= bytesEvicted
}
if bytesNeeded > db.SizeInBytesLimit() {
// put document
return false
}

serialized, err := db.serialize(row)
if err != nil {
logger.WarnWithCtx(db.ctx).Msg("Error serializing document, id:" + id)
return false
}

err = db.ElasticJSONDatabase.Put(id, serialized)
if err != nil {
logger.WarnWithCtx(db.ctx).Msgf("Error putting document, id: %s, error: %v", id, err)
return false
}

return true
}

// co zwraca? zrobić switch na oba typy jakie teraz mamy?
func (db *ElasticDatabaseWithEviction) Get(id string) (string, bool) { // probably change return type to *Sizeable
value, success, err := db.ElasticJSONDatabase.Get(id)
if err != nil {
logger.WarnWithCtx(db.ctx).Msgf("Error getting document, id: %s, error: %v", id, err)
return "", false
}
return value, success
}

func (db *ElasticDatabaseWithEviction) Delete(id string) {
// mark as deleted, don't actually delete
// (single document deletion is hard in ES, it's done by evictor for entire index)
}

func (db *ElasticDatabaseWithEviction) DocCount() (count int, success bool) {
// TODO: add WHERE not_deleted

// Build the query to get only document IDs
elasticsearchURL := fmt.Sprintf("%s/_search", db.indexName)
query := `{
"_source": false,
"size": 0,
"track_total_hits": true
}`

resp, err := db.httpClient.Request(context.Background(), "GET", elasticsearchURL, []byte(query))
defer resp.Body.Close()
if err != nil {
return
}

jsonAsBytes, err := io.ReadAll(resp.Body)
if err != nil {
return
}

switch resp.StatusCode {
case http.StatusOK:
break
default:
logger.WarnWithCtx(db.ctx).Msgf("failed to get from elastic: %s, response status code: %v", string(jsonAsBytes), resp.StatusCode)
return
}

// Unmarshal the JSON response
var result map[string]interface{}
if err = json.Unmarshal(jsonAsBytes, &result); err != nil {
logger.WarnWithCtx(db.ctx).Msgf("Error parsing the response JSON: %s", err)
return
}

count = int(result["hits"].(map[string]interface{})["total"].(map[string]interface{})["value"].(float64)) // TODO: add some checks... to prevent panic
return count, true
}

func (db *ElasticDatabaseWithEviction) SizeInBytes() (sizeInBytes int64, success bool) {
elasticsearchURL := fmt.Sprintf("%s/_search", db.indexName)

// Build the query to get only document IDs
query := fmt.Sprintf(`{"_source": false, "size": %d}`, MAX_DOC_COUNT)
}

func (db *ElasticDatabaseWithEviction) SizeInBytesLimit() int64 {
return db.sizeInBytesLimit
}

func (db *ElasticDatabaseWithEviction) getAll() *basicDocumentInfo {
// send query
return nil
}

func (db *ElasticDatabaseWithEviction) evict(documents []*basicDocumentInfo) {

}

func (db *ElasticDatabaseWithEviction) serialize(row Sizeable) (serialized string, err error) {
var b bytes.Buffer

enc := gob.NewEncoder(&b) // maybe create 1 encoder forever
if err = enc.Encode(row); err != nil {
fmt.Println("Error encoding struct:", err)
return
}

return b.String(), nil
}
18 changes: 18 additions & 0 deletions quesma/persistence/evictor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright Quesma, licensed under the Elastic License 2.0.
// SPDX-License-Identifier: Elastic-2.0
package persistence

type EvictorInterface interface {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not used. We may delete it.

SelectToEvict(documents []*basicDocumentInfo, sizeNeeded int64) (evictThese []*basicDocumentInfo, bytesEvicted int64)
}

// It's only 1 implementation, which looks well suited for ElasticSearch.
// It can be implemented differently.
type Evictor struct{}

func (e *Evictor) SelectToEvict(documents []*basicDocumentInfo, sizeNeeded int64) (evictThese []*basicDocumentInfo, bytesEvicted int64) {
if sizeNeeded <= 0 {
return // check if it's empty array or nil
}

}
23 changes: 23 additions & 0 deletions quesma/persistence/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// SPDX-License-Identifier: Elastic-2.0
package persistence

import "time"

// JSONDatabase is an interface for a database that stores JSON data.
// Treat it as `etcd` equivalent rather than `MongoDB`.
// The main usage is to store our configuration data, like
Expand All @@ -15,3 +17,24 @@ type JSONDatabase interface {
Get(key string) (string, bool, error)
Put(key string, data string) error
}

// T - type of the data to store, e.g. async_search_storage.AsyncRequestResult
type JSONDatabaseWithEviction interface { // for sure JSON? maybe not only json? check
Put(row *Sizeable) error
Get(id string) (*Sizeable, bool)
Delete(id string)
DocCount() int
SizeInBytes() int64
SizeInBytesLimit() int64
}

type basicDocumentInfo struct {
id string
sizeInBytes int64
timestamp time.Time
markedAsDeleted bool
}

type Sizeable interface {
SizeInBytes() int64
}
11 changes: 10 additions & 1 deletion quesma/quesma/async_search_storage/in_memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,19 @@ func (s AsyncSearchStorageInMemory) Delete(id string) {
s.idToResult.Delete(id)
}

func (s AsyncSearchStorageInMemory) Size() int {
func (s AsyncSearchStorageInMemory) DocCount() int {
return s.idToResult.Size()
}

func (s AsyncSearchStorageInMemory) SizeInBytes() int {
size := 0
s.Range(func(key string, value *AsyncRequestResult) bool {
size += len(value.GetResponseBody())
return true
})
return size
}

type AsyncQueryContextStorageInMemory struct {
idToContext *concurrent.Map[string, *AsyncQueryContext]
}
Expand Down
5 changes: 3 additions & 2 deletions quesma/quesma/async_search_storage/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ import (

type AsyncRequestResultStorage interface {
Store(id string, result *AsyncRequestResult)
Range(func(key string, value *AsyncRequestResult) bool) // ideally I'd like to get rid of this, but not sure if it's possible
Load(id string) (*AsyncRequestResult, bool)
Delete(id string)
Size() int
DocCount() int
SizeInBytes() uint64
SizeInBytesLimit() uint64
}

// TODO: maybe merge those 2?
Expand Down
11 changes: 1 addition & 10 deletions quesma/quesma/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -490,15 +490,6 @@ func (q *QueryRunner) storeAsyncSearch(qmc *ui.QuesmaManagementConsole, id, asyn
return
}

func (q *QueryRunner) asyncQueriesCumulatedBodySize() int {
size := 0
q.AsyncRequestStorage.Range(func(key string, value *async_search_storage.AsyncRequestResult) bool {
size += len(value.GetResponseBody())
return true
})
return size
}

func (q *QueryRunner) handlePartialAsyncSearch(ctx context.Context, id string) ([]byte, error) {
if !strings.Contains(id, tracing.AsyncIdPrefix) {
logger.ErrorWithCtx(ctx).Msgf("non quesma async id: %v", id)
Expand Down Expand Up @@ -543,7 +534,7 @@ func (q *QueryRunner) deleteAsyncSearch(id string) ([]byte, error) {
}

func (q *QueryRunner) reachedQueriesLimit(ctx context.Context, asyncId string, doneCh chan<- asyncSearchWithError) bool {
if q.AsyncRequestStorage.Size() < asyncQueriesLimit && q.asyncQueriesCumulatedBodySize() < asyncQueriesLimitBytes {
if q.AsyncRequestStorage.DocCount() < asyncQueriesLimit && q.AsyncRequestStorage.SizeInBytes() < asyncQueriesLimitBytes {
return false
}
err := errors.New("too many async queries")
Expand Down
Loading