Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 7494707

Browse files
authored
Error handling improvement (#190)
Requirements: 1. Let the user know what is going on. User should be able to figure out the reason of failure by looking at the "Top Errors" list. 2. This list is also send via telemetry. We should be able to do the same by looking at kibana dashboard. Change: - Add `ErrorType` enumeration. Not sure about the name. - Add`EndUserError`. This error holds a message and details that can be share with the end user. - Add guessing why clickhouse query has failed - Add sending message from the `EndUserError` as response body, known error and reason. - Some error passing clean up. It will be continued.
1 parent f2e03fe commit 7494707

File tree

14 files changed

+208
-25
lines changed

14 files changed

+208
-25
lines changed

http_requests/search-type-logs.http

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
POST http://localhost:8080/type_logs/_search
1+
POST http://localhost:8080/device_logs/_search
22
Content-Type: application/json
33

44

quesma/clickhouse/clickhouse.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/ClickHouse/clickhouse-go/v2"
99
"mitmproxy/quesma/concurrent"
1010
"mitmproxy/quesma/elasticsearch"
11+
"mitmproxy/quesma/end_user_errors"
1112
"mitmproxy/quesma/index"
1213
"mitmproxy/quesma/jsonprocessor"
1314
"mitmproxy/quesma/logger"
@@ -73,7 +74,8 @@ func NewTableMap() *TableMap {
7374

7475
func (lm *LogManager) Start() {
7576
if err := lm.chDb.Ping(); err != nil {
76-
logger.Error().Msgf("could not connect to clickhouse. error: %v", err)
77+
endUserError := end_user_errors.GuessClickhouseErrorType(err)
78+
logger.ErrorWithCtxAndReason(lm.ctx, endUserError.Reason()).Msgf("could not connect to clickhouse. error: %v", endUserError)
7779
}
7880

7981
lm.schemaLoader.ReloadTables()
@@ -411,12 +413,13 @@ func (lm *LogManager) ProcessInsertQuery(ctx context.Context, tableName string,
411413
}
412414

413415
func (lm *LogManager) Insert(ctx context.Context, tableName string, jsons []types.JSON, config *ChTableConfig) error {
416+
414417
var jsonsReadyForInsertion []string
415418
for _, jsonValue := range jsons {
416419
preprocessedJson := preprocess(jsonValue, NestedSeparator)
417420
insertJson, err := lm.BuildInsertJson(tableName, preprocessedJson, config)
418421
if err != nil {
419-
logger.ErrorWithCtx(ctx).Msgf("error BuildInsertJson, tablename: %s\nerror: %v\njson:%s", tableName, err, PrettyJson(insertJson))
422+
return fmt.Errorf("error BuildInsertJson, tablename: '%s' json: '%s': %v", tableName, PrettyJson(insertJson), err)
420423
}
421424
jsonsReadyForInsertion = append(jsonsReadyForInsertion, insertJson)
422425
}
@@ -432,9 +435,7 @@ func (lm *LogManager) Insert(ctx context.Context, tableName string, jsons []type
432435
_, err := lm.chDb.ExecContext(ctx, insert)
433436
span.End(err)
434437
if err != nil {
435-
errorMsg := fmt.Sprintf("error [%s] on Insert, tablename: [%s]", err, tableName)
436-
logger.ErrorWithCtx(ctx).Msg(errorMsg)
437-
return fmt.Errorf(errorMsg)
438+
return end_user_errors.GuessClickhouseErrorType(err).InternalDetails("insert into table '%s' failed", tableName)
438439
} else {
439440
return nil
440441
}

quesma/clickhouse/quesma_communicator.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"database/sql"
66
"fmt"
77
"math/rand"
8+
"mitmproxy/quesma/end_user_errors"
89
"mitmproxy/quesma/logger"
910
"mitmproxy/quesma/model"
1011
"sort"
@@ -117,7 +118,7 @@ func executeQuery(ctx context.Context, lm *LogManager, queryAsString string, fie
117118
rows, err := lm.Query(ctx, queryAsString)
118119
if err != nil {
119120
span.End(err)
120-
return nil, fmt.Errorf("clickhouse: query failed. err: %v, query: %v", err, queryAsString)
121+
return nil, end_user_errors.GuessClickhouseErrorType(err).InternalDetails("clickhouse: query failed. err: %v, query: %v", err, queryAsString)
121122
}
122123

123124
res, err := read(rows, fields, rowToScan)

quesma/clickhouse/schema_loader.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package clickhouse
22

33
import (
4+
"context"
5+
"errors"
6+
"mitmproxy/quesma/end_user_errors"
47
"mitmproxy/quesma/logger"
58
"mitmproxy/quesma/quesma/config"
69
"mitmproxy/quesma/util"
@@ -23,7 +26,14 @@ func (sl *schemaLoader) ReloadTables() {
2326
databaseName = sl.cfg.ClickHouse.Database
2427
}
2528
if tables, err := sl.SchemaManagement.readTables(databaseName); err != nil {
26-
logger.Error().Msgf("could not describe tables: %v", err)
29+
30+
var endUserError *end_user_errors.EndUserError
31+
if errors.As(err, &endUserError) {
32+
logger.ErrorWithCtxAndReason(context.Background(), endUserError.Reason()).Msgf("could not describe tables: %v", err)
33+
} else {
34+
logger.Error().Msgf("could not describe tables: %v", err)
35+
}
36+
2737
return
2838
} else {
2939
for table, columns := range tables {

quesma/clickhouse/schema_management.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package clickhouse
22

33
import (
44
"database/sql"
5+
"mitmproxy/quesma/end_user_errors"
56
"mitmproxy/quesma/logger"
67
)
78

@@ -18,6 +19,7 @@ func (s *SchemaManagement) readTables(database string) (map[string]map[string]st
1819

1920
rows, err := s.chDb.Query("SELECT table, name, type FROM system.columns WHERE database = ?", database)
2021
if err != nil {
22+
err = end_user_errors.GuessClickhouseErrorType(err).InternalDetails("reading list of columns from system.columns")
2123
return map[string]map[string]string{}, err
2224
}
2325
defer rows.Close()
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package end_user_errors
2+
3+
import (
4+
"errors"
5+
"strings"
6+
)
7+
8+
func GuessClickhouseErrorType(err error) *EndUserError {
9+
10+
// This limit a depth of error unwrapping.
11+
// We don't need forever loops especially in error handling branches
12+
const maxDepth = 100
13+
14+
for i := 0; i < maxDepth; i++ {
15+
s := err.Error()
16+
17+
// We should check the error type, not the string.
18+
// But Clickhouse doesn't provide a specific error type with details about the error.
19+
20+
if strings.Contains(s, "code: 60") {
21+
return ErrDatabaseTableNotFound.New(err)
22+
}
23+
24+
if strings.HasPrefix(s, "dial tcp") {
25+
return ErrDatabaseConnectionError.New(err)
26+
}
27+
28+
if strings.HasPrefix(s, "code: 516") {
29+
return ErrDatabaseAuthenticationError.New(err)
30+
}
31+
32+
err = errors.Unwrap(err)
33+
if err == nil {
34+
break
35+
}
36+
}
37+
38+
return ErrDatabaseOtherError.New(err)
39+
}

quesma/end_user_errors/end_user.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
package end_user_errors
2+
3+
import (
4+
"fmt"
5+
)
6+
7+
// EndUserError This represents an error that can be shown to the end user.
8+
type EndUserError struct {
9+
errorType *ErrorType
10+
details string
11+
internalDetails string
12+
originError error // this can be nil
13+
}
14+
15+
// Error this is the error interface implementation. This message is logged in the logs.
16+
func (e *EndUserError) Error() string {
17+
18+
details := ""
19+
20+
if e.internalDetails != "" {
21+
details = fmt.Sprintf("%s %s", details, e.internalDetails)
22+
}
23+
24+
if e.details != "" {
25+
details += e.details
26+
}
27+
28+
if e.originError == nil {
29+
return fmt.Sprintf("%s %s", e.errorType.String(), details)
30+
} else {
31+
return fmt.Sprintf("%s %s: %s", e.errorType.String(), details, e.originError)
32+
}
33+
}
34+
35+
// Reason returns message logged in to reason field
36+
func (e *EndUserError) Reason() string {
37+
return e.errorType.Message
38+
}
39+
40+
// EndUserErrorMessage returns the error message that can be shown to the end user.
41+
func (e *EndUserError) EndUserErrorMessage() string {
42+
return fmt.Sprintf("%s%s", e.errorType.String(), e.details)
43+
}
44+
45+
// Details sets details about the error. It will be available for end user.
46+
func (e *EndUserError) Details(format string, args ...any) *EndUserError {
47+
e.details = fmt.Sprintf(format, args...)
48+
return e
49+
}
50+
51+
// InternalDetails sets our internal details.
52+
func (e *EndUserError) InternalDetails(format string, args ...any) *EndUserError {
53+
e.internalDetails = fmt.Sprintf(format, args...)
54+
return e
55+
}
56+
57+
type ErrorType struct {
58+
Number int
59+
Message string
60+
}
61+
62+
func (t *ErrorType) String() string {
63+
return fmt.Sprintf("Q%04d: %s", t.Number, t.Message)
64+
}
65+
66+
// New create an error instance based on the error type.
67+
func (t *ErrorType) New(err error) *EndUserError {
68+
return &EndUserError{
69+
errorType: t,
70+
originError: err,
71+
}
72+
}
73+
74+
func errorType(number int, message string) *ErrorType {
75+
return &ErrorType{Number: number, Message: message}
76+
}
77+
78+
// Error type numbers follow the pattern QXXXX
79+
// Where
80+
// Q1XXX - Preprocessing errors (related to HTTP requests, JSON parsing, etc.)
81+
// Q2XXX - Query processing errors. Query translation etc.
82+
// Q3XXX - Errors related to external storages like Clickhouse, Elasticsearch, etc.
83+
// Q4XXX - Errors related to other internal components telemetry, etc.
84+
85+
var ErrSearchCondition = errorType(2001, "Not supported search condition.")
86+
var ErrNoSuchTable = errorType(2002, "Missing table.")
87+
88+
var ErrDatabaseTableNotFound = errorType(3001, "Table not found in database.")
89+
var ErrDatabaseFieldNotFound = errorType(3002, "Field not found in database.")
90+
var ErrDatabaseConnectionError = errorType(3003, "Error connecting to database.")
91+
var ErrDatabaseQueryError = errorType(3004, "Error executing query in database.")
92+
var ErrDatabaseAuthenticationError = errorType(3005, "Error authenticating with database.")
93+
var ErrDatabaseOtherError = errorType(3006, "Unspecified database error.")
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package end_user_errors
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"testing"
7+
)
8+
9+
func TestEndUserError_error_as(t *testing.T) {
10+
11+
err := ErrDatabaseAuthenticationError.New(fmt.Errorf("some error"))
12+
13+
var asEndUserError *EndUserError
14+
15+
if !errors.As(err, &asEndUserError) {
16+
t.Fatal("expected error to be of type *EndUserError")
17+
}
18+
}

quesma/quesma/quesma.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ package quesma
33
import (
44
"bytes"
55
"context"
6+
"errors"
67
"fmt"
78
"io"
89
"mitmproxy/quesma/clickhouse"
910
"mitmproxy/quesma/elasticsearch"
11+
"mitmproxy/quesma/end_user_errors"
1012
"mitmproxy/quesma/feature"
1113
"mitmproxy/quesma/logger"
1214
"mitmproxy/quesma/network"
@@ -207,11 +209,22 @@ func (r *router) reroute(ctx context.Context, w http.ResponseWriter, req *http.R
207209
r.failedRequests.Add(1)
208210

209211
if elkResponse != nil && r.config.Mode == config.DualWriteQueryClickhouseFallback {
210-
logger.ErrorWithCtx(ctx).Msgf("Error processing request while responding from Elastic: %v", err)
212+
logger.ErrorWithCtx(ctx).Msgf("quesma request failed: %v", err)
211213
responseFromElastic(ctx, elkResponse, w)
212214

213215
} else {
214-
logger.ErrorWithCtx(ctx).Msgf("Error processing request while responding from Quesma: %v", err)
216+
217+
msg := "Internal Quesma Error.\nPlease contact support if the problem persists."
218+
reason := "Failed request."
219+
220+
// if error is an error with user-friendly message, we should use it
221+
var endUserError *end_user_errors.EndUserError
222+
if errors.As(err, &endUserError) {
223+
msg = endUserError.EndUserErrorMessage()
224+
reason = endUserError.Reason()
225+
}
226+
227+
logger.ErrorWithCtxAndReason(ctx, reason).Msgf("quesma request failed: %v", err)
215228

216229
requestId := "n/a"
217230
if contextRid, ok := ctx.Value(tracing.RequestIdCtxKey).(string); ok {
@@ -220,7 +233,7 @@ func (r *router) reroute(ctx context.Context, w http.ResponseWriter, req *http.R
220233

221234
// We should not send our error message to the client. There can be sensitive information in it.
222235
// We will send ID of failed request instead
223-
responseFromQuesma(ctx, []byte(fmt.Sprintf("Internal server error. Request ID: %s\n", requestId)), w, elkResponse, mux.ServerErrorResult(), zip)
236+
responseFromQuesma(ctx, []byte(fmt.Sprintf("%s\nRequest ID: %s\n", msg, requestId)), w, elkResponse, mux.ServerErrorResult(), zip)
224237
}
225238
}
226239
} else {

quesma/quesma/router.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ func configureRouter(cfg config.QuesmaConfiguration, lm *clickhouse.LogManager,
7070
return nil, fmt.Errorf("invalid request body, expecting JSON . Got: %T", req.ParsedBody)
7171
}
7272

73-
dualWrite(ctx, req.Params["index"], body, lm, cfg)
74-
return indexDocResult(req.Params["index"], httpOk), nil
73+
err := dualWrite(ctx, req.Params["index"], body, lm, cfg)
74+
return indexDocResult(req.Params["index"], httpOk), err
7575
})
7676

7777
router.Register(routes.IndexBulkPath, and(method("POST", "PUT"), matchedExact(cfg)), func(ctx context.Context, req *mux.Request) (*mux.Result, error) {

0 commit comments

Comments
 (0)