Skip to content

Report telemetry errors also to logs #629

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions packages/api/internal/auth/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func (a *commonAuthenticator[T]) Authenticate(ctx context.Context, input *openap
// Now, we need to get the API key from the request
headerKey, err := a.getHeaderKeysFromRequest(input.RequestValidationInput.Request)
if err != nil {
telemetry.ReportCriticalError(ctx, fmt.Errorf("%v %w", a.errorMessage, err))
telemetry.ReportCriticalError(ctx, a.errorMessage, err)

return fmt.Errorf("%v %w", a.errorMessage, err)
}
Expand All @@ -89,7 +89,7 @@ func (a *commonAuthenticator[T]) Authenticate(ctx context.Context, input *openap
result, validationError := a.validationFunction(ctx, headerKey)
if validationError != nil {
zap.L().Info("validation error", zap.Error(validationError.Err))
telemetry.ReportError(ctx, fmt.Errorf("%s %w", a.errorMessage, validationError.Err))
telemetry.ReportError(ctx, a.errorMessage, validationError.Err)

var forbiddenError *db.TeamForbiddenError
if errors.As(validationError.Err, &forbiddenError) {
Expand Down
2 changes: 1 addition & 1 deletion packages/api/internal/cache/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func (c *InstanceCache) Set(key string, value *InstanceInfo) {
go func() {
err := c.insertInstance(value)
if err != nil {
fmt.Printf("error inserting instance: %v", err)
zap.L().Error("error inserting instance", zap.Error(err))
}
}()
}
Expand Down
21 changes: 8 additions & 13 deletions packages/api/internal/handlers/accesstoken.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ func (a *APIStore) PostAccessTokens(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))

errMsg := fmt.Errorf("error when parsing request: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when parsing request", err)

return
}
Expand All @@ -34,8 +33,7 @@ func (a *APIStore) PostAccessTokens(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when generating access token: %s", err))

errMsg := fmt.Errorf("error when generating access token: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when generating access token", err)

return
}
Expand All @@ -53,8 +51,7 @@ func (a *APIStore) PostAccessTokens(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when creating access token: %s", err))

errMsg := fmt.Errorf("error when creating access token: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when creating access token", err)

return
}
Expand All @@ -64,9 +61,7 @@ func (a *APIStore) PostAccessTokens(c *gin.Context) {
maskedToken, err := keys.GetMaskedIdentifierProperties(keys.AccessTokenPrefix, valueWithoutPrefix)
if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when masking access token: %s", err))

errMsg := fmt.Errorf("error when masking access token: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when masking access token", err)

return
}
Expand Down Expand Up @@ -94,8 +89,8 @@ func (a *APIStore) DeleteAccessTokensAccessTokenID(c *gin.Context, accessTokenID
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing access token ID: %s", err))

errMsg := fmt.Errorf("error when parsing access token ID: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when parsing access token ID", err)

return
}

Expand All @@ -108,8 +103,8 @@ func (a *APIStore) DeleteAccessTokensAccessTokenID(c *gin.Context, accessTokenID
} else if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when deleting access token: %s", err))

errMsg := fmt.Errorf("error when deleting access token: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when deleting access token", err)

return
}

Expand Down
6 changes: 2 additions & 4 deletions packages/api/internal/handlers/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ func (a *APIStore) PostNodesNodeID(c *gin.Context, nodeId api.NodeID) {
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))

errMsg := fmt.Errorf("error when parsing request: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when parsing request", err)

return
}
Expand All @@ -57,8 +56,7 @@ func (a *APIStore) PostNodesNodeID(c *gin.Context, nodeId api.NodeID) {
if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when sending status change: %s", err))

errMsg := fmt.Errorf("error when sending status change: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when sending status change", err)
return
}

Expand Down
31 changes: 11 additions & 20 deletions packages/api/internal/handlers/apikey.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ package handlers

import (
"fmt"
"log"
"net/http"
"strings"
"time"

"github.com/gin-gonic/gin"
"github.com/google/uuid"
"go.uber.org/zap"

"github.com/e2b-dev/infra/packages/api/internal/api"
"github.com/e2b-dev/infra/packages/api/internal/team"
Expand All @@ -26,17 +26,15 @@ func (a *APIStore) PatchApiKeysApiKeyID(c *gin.Context, apiKeyID string) {
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))

errMsg := fmt.Errorf("error when parsing request: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when parsing request", err)
return
}

apiKeyIDParsed, err := uuid.Parse(apiKeyID)
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing API key ID: %s", err))

errMsg := fmt.Errorf("error when parsing API key ID: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when parsing API key ID", err)
return
}

Expand All @@ -47,8 +45,7 @@ func (a *APIStore) PatchApiKeysApiKeyID(c *gin.Context, apiKeyID string) {
} else if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when updating team API key name: %s", err))

errMsg := fmt.Errorf("error when updating team API key name: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when updating team API key name", err)
return
}

Expand All @@ -66,7 +63,7 @@ func (a *APIStore) GetApiKeys(c *gin.Context) {
WithCreator().
All(ctx)
if err != nil {
log.Println("Error when getting team API keys: ", err)
zap.L().Warn("error when getting team API keys", zap.Error(err))
c.String(http.StatusInternalServerError, "Error when getting team API keys")

return
Expand Down Expand Up @@ -115,8 +112,7 @@ func (a *APIStore) DeleteApiKeysApiKeyID(c *gin.Context, apiKeyID string) {
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing API key ID: %s", err))

errMsg := fmt.Errorf("error when parsing API key ID: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when parsing API key ID", err)
return
}

Expand All @@ -127,8 +123,7 @@ func (a *APIStore) DeleteApiKeysApiKeyID(c *gin.Context, apiKeyID string) {
} else if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when deleting API key: %s", err))

errMsg := fmt.Errorf("error when deleting API key: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when deleting API key", err)
return
}

Expand All @@ -145,8 +140,7 @@ func (a *APIStore) PostApiKeys(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))

errMsg := fmt.Errorf("error when parsing request: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when parsing request", err)

return
}
Expand All @@ -155,8 +149,7 @@ func (a *APIStore) PostApiKeys(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when creating team API key: %s", err))

errMsg := fmt.Errorf("error when creating team API key: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when creating team API key", err)

return
}
Expand All @@ -165,8 +158,7 @@ func (a *APIStore) PostApiKeys(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when getting user: %s", err))

errMsg := fmt.Errorf("error when getting user: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when getting user", err)

return
}
Expand All @@ -177,8 +169,7 @@ func (a *APIStore) PostApiKeys(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when creating response key mask: %s", err))

errMsg := fmt.Errorf("error when masking response key for creating API key %d: %w", apiKey.ID, err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, fmt.Sprintf("error when masking response key for creating API key %d", apiKey.ID), err)

return
}
Expand Down
4 changes: 1 addition & 3 deletions packages/api/internal/handlers/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package handlers

import (
"context"
"fmt"
"net/http"
"time"

Expand Down Expand Up @@ -57,8 +56,7 @@ func (a *APIStore) startSandbox(
envdAccessToken,
)
if instanceErr != nil {
errMsg := fmt.Errorf("error when creating instance: %w", instanceErr.Err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when creating instance", instanceErr.Err)
return nil, instanceErr
}

Expand Down
8 changes: 3 additions & 5 deletions packages/api/internal/handlers/sandbox_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ func (a *APIStore) PostSandboxes(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))

errMsg := fmt.Errorf("error when parsing request: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when parsing request", err)

return
}
Expand All @@ -67,8 +66,7 @@ func (a *APIStore) PostSandboxes(c *gin.Context) {
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid environment ID: %s", err))

errMsg := fmt.Errorf("error when cleaning env ID: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when cleaning env ID", err)

return
}
Expand All @@ -81,7 +79,7 @@ func (a *APIStore) PostSandboxes(c *gin.Context) {
// Check if team has access to the environment
env, build, checkErr := a.templateCache.Get(ctx, cleanedAliasOrEnvID, teamInfo.Team.ID, true)
if checkErr != nil {
telemetry.ReportCriticalError(ctx, checkErr.Err)
telemetry.ReportCriticalError(ctx, "error when getting template", checkErr.Err)
a.sendAPIStoreError(c, checkErr.Code, checkErr.ClientMsg)
return
}
Expand Down
15 changes: 6 additions & 9 deletions packages/api/internal/handlers/sandbox_kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"go.opentelemetry.io/otel/attribute"
"go.uber.org/zap"

"github.com/e2b-dev/infra/packages/api/internal/auth"
authcache "github.com/e2b-dev/infra/packages/api/internal/cache/auth"
Expand Down Expand Up @@ -58,8 +57,7 @@ func (a *APIStore) deleteSnapshot(

deleteJobErr := a.templateManager.DeleteBuilds(deleteCtx, envBuildIDs)
if deleteJobErr != nil {
zap.L().Warn("Error deleting snapshot builds", zap.Error(deleteJobErr), zap.String("sandboxID", sandboxID))
telemetry.ReportError(deleteCtx, deleteJobErr)
telemetry.ReportError(deleteCtx, "error deleting snapshot builds", deleteJobErr, attribute.String("sandboxID", sandboxID))
}
}()

Expand Down Expand Up @@ -87,8 +85,7 @@ func (a *APIStore) DeleteSandboxesSandboxID(
sbx, err := a.orchestrator.GetSandbox(sandboxID)
if err == nil {
if *sbx.TeamID != teamID {
errMsg := fmt.Errorf("sandbox '%s' does not belong to team '%s'", sandboxID, teamID.String())
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "sandbox does not belong to team", fmt.Errorf("sandbox '%s' does not belong to team '%s'", sandboxID, teamID.String()))

a.sendAPIStoreError(c, http.StatusUnauthorized, fmt.Sprintf("Error deleting sandbox - sandbox '%s' does not belong to your team '%s'", sandboxID, teamID.String()))

Expand All @@ -98,7 +95,7 @@ func (a *APIStore) DeleteSandboxesSandboxID(
// remove running sandbox from the orchestrator
sandboxExists := a.orchestrator.DeleteInstance(ctx, sandboxID, false)
if !sandboxExists {
telemetry.ReportError(ctx, fmt.Errorf("sandbox '%s' not found", sandboxID))
telemetry.ReportError(ctx, "sandbox not found", fmt.Errorf("sandbox '%s' not found", sandboxID), attribute.String("sandboxID", sandboxID))
a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("Error deleting sandbox - sandbox '%s' was not found", sandboxID))

return
Expand All @@ -107,7 +104,7 @@ func (a *APIStore) DeleteSandboxesSandboxID(
// remove any snapshots of the sandbox
err := a.deleteSnapshot(ctx, sandboxID, teamID)
if err != nil && !errors.Is(err, db.EnvNotFound{}) {
telemetry.ReportError(ctx, err)
telemetry.ReportError(ctx, "error deleting sandbox", err)
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error deleting sandbox: %s", err))

return
Expand All @@ -123,14 +120,14 @@ func (a *APIStore) DeleteSandboxesSandboxID(
// remove any snapshots when the sandbox is not running
deleteSnapshotErr := a.deleteSnapshot(ctx, sandboxID, teamID)
if errors.Is(deleteSnapshotErr, db.EnvNotFound{}) {
telemetry.ReportError(ctx, fmt.Errorf("snapshot for sandbox '%s' not found", sandboxID))
telemetry.ReportError(ctx, "snapshot for sandbox not found", fmt.Errorf("snapshot for sandbox '%s' not found", sandboxID), attribute.String("sandboxID", sandboxID))
a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("Error deleting sandbox - sandbox '%s' not found", sandboxID))

return
}

if deleteSnapshotErr != nil {
telemetry.ReportError(ctx, deleteSnapshotErr)
telemetry.ReportError(ctx, "error deleting sandbox", deleteSnapshotErr)
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error deleting sandbox: %s", deleteSnapshotErr))

return
Expand Down
6 changes: 2 additions & 4 deletions packages/api/internal/handlers/sandbox_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ func (a *APIStore) GetSandboxesSandboxIDLogs(

res, err := a.lokiClient.QueryRange(query, int(*params.Limit), start, end, logproto.FORWARD, time.Duration(0), time.Duration(0), true)
if err != nil {
errMsg := fmt.Errorf("error when returning logs for sandbox: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "error when returning logs for sandbox", err)
a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("Error returning logs for sandbox '%s'", sandboxID))

return
Expand Down Expand Up @@ -87,8 +86,7 @@ func (a *APIStore) GetSandboxesSandboxIDLogs(
})

default:
errMsg := fmt.Errorf("unexpected value type %T", res.Data.Result.Type())
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "unexpected value type", fmt.Errorf("unexpected value type %T", res.Data.Result.Type()))
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error returning logs for sandbox '%s", sandboxID))

return
Expand Down
10 changes: 2 additions & 8 deletions packages/api/internal/handlers/sandbox_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"github.com/grafana/loki/pkg/loghttp"
"github.com/grafana/loki/pkg/logproto"
"go.opentelemetry.io/otel/attribute"
"go.uber.org/zap"

"github.com/e2b-dev/infra/packages/api/internal/api"
"github.com/e2b-dev/infra/packages/api/internal/auth"
Expand Down Expand Up @@ -70,8 +69,7 @@ func (a *APIStore) LegacyGetSandboxIDMetrics(

err := json.Unmarshal([]byte(entry.Line), &metric)
if err != nil {
zap.L().Error("Failed to unmarshal metric", zap.String("sandbox_id", sandboxID), zap.Error(err))
telemetry.ReportCriticalError(ctx, fmt.Errorf("failed to unmarshal metric: %w", err))
telemetry.ReportCriticalError(ctx, "failed to unmarshal metric", err, attribute.String("sandbox_id", sandboxID))

continue
}
Expand Down Expand Up @@ -174,11 +172,7 @@ func (a *APIStore) GetSandboxesSandboxIDMetrics(
metrics, err := a.readMetricsBasedOnConfig(ctx, sandboxID, teamID, a)

if err != nil {
zap.L().Error("Error returning metrics for sandbox",
zap.Error(err),
zap.String("sandboxID", sandboxID),
)
telemetry.ReportCriticalError(ctx, err)
telemetry.ReportCriticalError(ctx, "error returning metrics for sandbox", err, attribute.String("sandboxID", sandboxID))
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error returning metrics for sandbox '%s'", sandboxID))

return
Expand Down
3 changes: 1 addition & 2 deletions packages/api/internal/handlers/sandbox_pause.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ func (a *APIStore) PostSandboxesSandboxIDPause(c *gin.Context, sandboxID api.San
}

if *sbx.TeamID != teamID {
errMsg := fmt.Errorf("sandbox '%s' does not belong to team '%s'", sandboxID, teamID.String())
telemetry.ReportCriticalError(ctx, errMsg)
telemetry.ReportCriticalError(ctx, "sandbox does not belong to team", fmt.Errorf("sandbox '%s' does not belong to team '%s'", sandboxID, teamID.String()))

a.sendAPIStoreError(c, http.StatusUnauthorized, fmt.Sprintf("Error pausing sandbox - sandbox '%s' does not belong to your team '%s'", sandboxID, teamID.String()))

Expand Down
Loading
Loading