diff --git a/packages/api/internal/auth/middleware.go b/packages/api/internal/auth/middleware.go index 2d775bb7f..d0d047bc3 100644 --- a/packages/api/internal/auth/middleware.go +++ b/packages/api/internal/auth/middleware.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "log" "net/http" "os" "strings" @@ -82,7 +81,7 @@ func (a *commonAuthenticator[T]) Authenticate(ctx context.Context, input *openap // Now, we need to get the API key from the request headerKey, err := a.getHeaderKeysFromRequest(input.RequestValidationInput.Request) if err != nil { - telemetry.ReportCriticalError(ctx, fmt.Errorf("%v %w", a.errorMessage, err)) + telemetry.ReportCriticalError(ctx, a.errorMessage, err) return fmt.Errorf("%v %w", a.errorMessage, err) } @@ -92,8 +91,7 @@ func (a *commonAuthenticator[T]) Authenticate(ctx context.Context, input *openap // If the API key is valid, we will get a result back result, validationError := a.validationFunction(ctx, headerKey) if validationError != nil { - log.Printf("validation error %v", validationError.Err) - telemetry.ReportError(ctx, fmt.Errorf("%s %w", a.errorMessage, validationError.Err)) + telemetry.ReportError(ctx, a.errorMessage, validationError.Err) return fmt.Errorf(a.errorMessage) } diff --git a/packages/api/internal/cache/instance/instance.go b/packages/api/internal/cache/instance/instance.go index 6b3d0bfc6..deb8e8616 100644 --- a/packages/api/internal/cache/instance/instance.go +++ b/packages/api/internal/cache/instance/instance.go @@ -197,7 +197,7 @@ func (c *InstanceCache) Set(key string, value *InstanceInfo) { go func() { err := c.insertInstance(value) if err != nil { - fmt.Printf("error inserting instance: %v", err) + zap.L().Error("error inserting instance", zap.Error(err)) } }() } diff --git a/packages/api/internal/handlers/accesstoken.go b/packages/api/internal/handlers/accesstoken.go index 886dcd66c..a10232b5b 100644 --- a/packages/api/internal/handlers/accesstoken.go +++ b/packages/api/internal/handlers/accesstoken.go @@ -23,8 +23,7 @@ func (a *APIStore) PostAccessTokens(c *gin.Context) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) - errMsg := fmt.Errorf("error when parsing request: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) return } @@ -33,8 +32,7 @@ func (a *APIStore) PostAccessTokens(c *gin.Context) { if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when generating access token: %s", err)) - errMsg := fmt.Errorf("error when generating access token: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when generating access token", err) return } @@ -52,8 +50,7 @@ func (a *APIStore) PostAccessTokens(c *gin.Context) { if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when creating access token: %s", err)) - errMsg := fmt.Errorf("error when creating access token: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when creating access token", err) return } @@ -76,8 +73,8 @@ func (a *APIStore) DeleteAccessTokensAccessTokenID(c *gin.Context, accessTokenID if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing access token ID: %s", err)) - errMsg := fmt.Errorf("error when parsing access token ID: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing access token ID", err) + return } @@ -90,8 +87,8 @@ func (a *APIStore) DeleteAccessTokensAccessTokenID(c *gin.Context, accessTokenID } else if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when deleting access token: %s", err)) - errMsg := fmt.Errorf("error when deleting access token: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when deleting access token", err) + return } diff --git a/packages/api/internal/handlers/admin.go b/packages/api/internal/handlers/admin.go index 32ea29f58..80bee9390 100644 --- a/packages/api/internal/handlers/admin.go +++ b/packages/api/internal/handlers/admin.go @@ -41,8 +41,7 @@ func (a *APIStore) PostNodesNodeID(c *gin.Context, nodeId api.NodeID) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) - errMsg := fmt.Errorf("error when parsing request: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) return } diff --git a/packages/api/internal/handlers/apikey.go b/packages/api/internal/handlers/apikey.go index c6f99db5f..cfd188e68 100644 --- a/packages/api/internal/handlers/apikey.go +++ b/packages/api/internal/handlers/apikey.go @@ -2,13 +2,13 @@ package handlers import ( "fmt" - "log" "net/http" "strings" "time" "github.com/gin-gonic/gin" "github.com/google/uuid" + "go.uber.org/zap" "github.com/e2b-dev/infra/packages/api/internal/api" "github.com/e2b-dev/infra/packages/api/internal/team" @@ -26,8 +26,7 @@ func (a *APIStore) PatchApiKeysApiKeyID(c *gin.Context, apiKeyID string) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) - errMsg := fmt.Errorf("error when parsing request: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) return } @@ -35,8 +34,7 @@ func (a *APIStore) PatchApiKeysApiKeyID(c *gin.Context, apiKeyID string) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing API key ID: %s", err)) - errMsg := fmt.Errorf("error when parsing API key ID: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing API key ID", err) return } @@ -47,8 +45,7 @@ func (a *APIStore) PatchApiKeysApiKeyID(c *gin.Context, apiKeyID string) { } else if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when updating team API key name: %s", err)) - errMsg := fmt.Errorf("error when updating team API key name: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when updating team API key name", err) return } @@ -66,7 +63,7 @@ func (a *APIStore) GetApiKeys(c *gin.Context) { WithCreator(). All(ctx) if err != nil { - log.Println("Error when getting team API keys: ", err) + zap.L().Warn("error when getting team API keys", zap.Error(err)) c.String(http.StatusInternalServerError, "Error when getting team API keys") return @@ -110,8 +107,7 @@ func (a *APIStore) DeleteApiKeysApiKeyID(c *gin.Context, apiKeyID string) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing API key ID: %s", err)) - errMsg := fmt.Errorf("error when parsing API key ID: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing API key ID", err) return } @@ -122,8 +118,7 @@ func (a *APIStore) DeleteApiKeysApiKeyID(c *gin.Context, apiKeyID string) { } else if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when deleting API key: %s", err)) - errMsg := fmt.Errorf("error when deleting API key: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when deleting API key", err) return } @@ -140,8 +135,7 @@ func (a *APIStore) PostApiKeys(c *gin.Context) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) - errMsg := fmt.Errorf("error when parsing request: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) return } @@ -150,8 +144,7 @@ func (a *APIStore) PostApiKeys(c *gin.Context) { if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when creating team API key: %s", err)) - errMsg := fmt.Errorf("error when creating team API key: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when creating team API key", err) return } @@ -160,8 +153,7 @@ func (a *APIStore) PostApiKeys(c *gin.Context) { if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when getting user: %s", err)) - errMsg := fmt.Errorf("error when getting user: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when getting user", err) return } diff --git a/packages/api/internal/handlers/sandbox.go b/packages/api/internal/handlers/sandbox.go index 01bc8c626..74759b825 100644 --- a/packages/api/internal/handlers/sandbox.go +++ b/packages/api/internal/handlers/sandbox.go @@ -2,7 +2,6 @@ package handlers import ( "context" - "fmt" "net/http" "time" @@ -53,8 +52,7 @@ func (a *APIStore) startSandbox( envdAccessToken, ) if instanceErr != nil { - errMsg := fmt.Errorf("error when creating instance: %w", instanceErr.Err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when creating instance", instanceErr.Err) return nil, instanceErr } diff --git a/packages/api/internal/handlers/sandbox_create.go b/packages/api/internal/handlers/sandbox_create.go index 0270b21db..30f16f307 100644 --- a/packages/api/internal/handlers/sandbox_create.go +++ b/packages/api/internal/handlers/sandbox_create.go @@ -55,8 +55,7 @@ func (a *APIStore) PostSandboxes(c *gin.Context) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) - errMsg := fmt.Errorf("error when parsing request: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) return } @@ -67,8 +66,7 @@ func (a *APIStore) PostSandboxes(c *gin.Context) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid environment ID: %s", err)) - errMsg := fmt.Errorf("error when cleaning env ID: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when cleaning env ID", err) return } @@ -81,7 +79,7 @@ func (a *APIStore) PostSandboxes(c *gin.Context) { // Check if team has access to the environment env, build, checkErr := a.templateCache.Get(ctx, cleanedAliasOrEnvID, teamInfo.Team.ID, true) if checkErr != nil { - telemetry.ReportCriticalError(ctx, checkErr.Err) + telemetry.ReportCriticalError(ctx, "error when getting template", checkErr.Err) a.sendAPIStoreError(c, checkErr.Code, checkErr.ClientMsg) return } diff --git a/packages/api/internal/handlers/sandbox_kill.go b/packages/api/internal/handlers/sandbox_kill.go index 13a857803..41c1a90e1 100644 --- a/packages/api/internal/handlers/sandbox_kill.go +++ b/packages/api/internal/handlers/sandbox_kill.go @@ -66,8 +66,7 @@ func (a *APIStore) DeleteSandboxesSandboxID( sbx, err := a.orchestrator.GetSandbox(sandboxID) if err == nil { if *sbx.TeamID != teamID { - errMsg := fmt.Errorf("sandbox '%s' does not belong to team '%s'", sandboxID, teamID.String()) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "sandbox does not belong to team", fmt.Errorf("sandbox '%s' does not belong to team '%s'", sandboxID, teamID.String())) a.sendAPIStoreError(c, http.StatusUnauthorized, fmt.Sprintf("Error deleting sandbox - sandbox '%s' does not belong to your team '%s'", sandboxID, teamID.String())) diff --git a/packages/api/internal/handlers/sandbox_logs.go b/packages/api/internal/handlers/sandbox_logs.go index 7c21e1d7e..52f26749c 100644 --- a/packages/api/internal/handlers/sandbox_logs.go +++ b/packages/api/internal/handlers/sandbox_logs.go @@ -55,8 +55,7 @@ func (a *APIStore) GetSandboxesSandboxIDLogs( res, err := a.lokiClient.QueryRange(query, int(*params.Limit), start, end, logproto.FORWARD, time.Duration(0), time.Duration(0), true) if err != nil { - errMsg := fmt.Errorf("error when returning logs for sandbox: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when returning logs for sandbox", err) a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("Error returning logs for sandbox '%s'", sandboxID)) return @@ -87,8 +86,7 @@ func (a *APIStore) GetSandboxesSandboxIDLogs( }) default: - errMsg := fmt.Errorf("unexpected value type %T", res.Data.Result.Type()) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "unexpected value type", fmt.Errorf("unexpected value type %T", res.Data.Result.Type())) a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error returning logs for sandbox '%s", sandboxID)) return diff --git a/packages/api/internal/handlers/sandbox_metrics.go b/packages/api/internal/handlers/sandbox_metrics.go index 0e696e025..072b32f3f 100644 --- a/packages/api/internal/handlers/sandbox_metrics.go +++ b/packages/api/internal/handlers/sandbox_metrics.go @@ -13,7 +13,6 @@ import ( "github.com/grafana/loki/pkg/loghttp" "github.com/grafana/loki/pkg/logproto" "go.opentelemetry.io/otel/attribute" - "go.uber.org/zap" "github.com/e2b-dev/infra/packages/api/internal/api" "github.com/e2b-dev/infra/packages/api/internal/auth" @@ -70,8 +69,7 @@ func (a *APIStore) LegacyGetSandboxIDMetrics( err := json.Unmarshal([]byte(entry.Line), &metric) if err != nil { - zap.L().Error("Failed to unmarshal metric", zap.String("sandbox_id", sandboxID), zap.Error(err)) - telemetry.ReportCriticalError(ctx, fmt.Errorf("failed to unmarshal metric: %w", err)) + telemetry.ReportCriticalError(ctx, "failed to unmarshal metric", err, attribute.String("sandbox_id", sandboxID)) continue } @@ -174,11 +172,7 @@ func (a *APIStore) GetSandboxesSandboxIDMetrics( metrics, err := a.readMetricsBasedOnConfig(ctx, sandboxID, teamID, a) if err != nil { - zap.L().Error("Error returning metrics for sandbox", - zap.Error(err), - zap.String("sandboxID", sandboxID), - ) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error returning metrics for sandbox", err, attribute.String("sandboxID", sandboxID)) a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error returning metrics for sandbox '%s'", sandboxID)) return diff --git a/packages/api/internal/handlers/sandbox_pause.go b/packages/api/internal/handlers/sandbox_pause.go index 019d5b512..007837db9 100644 --- a/packages/api/internal/handlers/sandbox_pause.go +++ b/packages/api/internal/handlers/sandbox_pause.go @@ -50,8 +50,7 @@ func (a *APIStore) PostSandboxesSandboxIDPause(c *gin.Context, sandboxID api.San } if *sbx.TeamID != teamID { - errMsg := fmt.Errorf("sandbox '%s' does not belong to team '%s'", sandboxID, teamID.String()) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "sandbox does not belong to team", fmt.Errorf("sandbox '%s' does not belong to team '%s'", sandboxID, teamID.String())) a.sendAPIStoreError(c, http.StatusUnauthorized, fmt.Sprintf("Error pausing sandbox - sandbox '%s' does not belong to your team '%s'", sandboxID, teamID.String())) diff --git a/packages/api/internal/handlers/sandbox_refresh.go b/packages/api/internal/handlers/sandbox_refresh.go index b15a6b8b3..9e5e9b897 100644 --- a/packages/api/internal/handlers/sandbox_refresh.go +++ b/packages/api/internal/handlers/sandbox_refresh.go @@ -26,8 +26,7 @@ func (a *APIStore) PostSandboxesSandboxIDRefreshes( if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) - errMsg := fmt.Errorf("error when parsing request: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) return } @@ -44,7 +43,7 @@ func (a *APIStore) PostSandboxesSandboxIDRefreshes( apiErr := a.orchestrator.KeepAliveFor(ctx, sandboxID, duration, false) if apiErr != nil { - telemetry.ReportCriticalError(ctx, apiErr.Err) + telemetry.ReportCriticalError(ctx, "error when refreshing sandbox", apiErr.Err) a.sendAPIStoreError(c, apiErr.Code, apiErr.ClientMsg) return diff --git a/packages/api/internal/handlers/sandbox_resume.go b/packages/api/internal/handlers/sandbox_resume.go index 09e885f41..b4679e6dd 100644 --- a/packages/api/internal/handlers/sandbox_resume.go +++ b/packages/api/internal/handlers/sandbox_resume.go @@ -47,8 +47,7 @@ func (a *APIStore) PostSandboxesSandboxIDResume(c *gin.Context, sandboxID api.Sa if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) - errMsg := fmt.Errorf("error when parsing request: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) return } diff --git a/packages/api/internal/handlers/sandbox_timeout.go b/packages/api/internal/handlers/sandbox_timeout.go index f3beace7a..a28f4ffea 100644 --- a/packages/api/internal/handlers/sandbox_timeout.go +++ b/packages/api/internal/handlers/sandbox_timeout.go @@ -25,8 +25,7 @@ func (a *APIStore) PostSandboxesSandboxIDTimeout( if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) - errMsg := fmt.Errorf("error when parsing request: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) return } @@ -39,7 +38,7 @@ func (a *APIStore) PostSandboxesSandboxIDTimeout( apiErr := a.orchestrator.KeepAliveFor(ctx, sandboxID, duration, true) if apiErr != nil { - telemetry.ReportCriticalError(ctx, apiErr.Err) + telemetry.ReportCriticalError(ctx, "error when setting timeout", apiErr.Err) a.sendAPIStoreError(c, apiErr.Code, apiErr.ClientMsg) return diff --git a/packages/api/internal/handlers/sandboxes_list_metrics.go b/packages/api/internal/handlers/sandboxes_list_metrics.go index 862a5d6ac..1cbad4fe8 100644 --- a/packages/api/internal/handlers/sandboxes_list_metrics.go +++ b/packages/api/internal/handlers/sandboxes_list_metrics.go @@ -205,8 +205,7 @@ func (a *APIStore) GetSandboxesMetrics(c *gin.Context, params api.GetSandboxesMe sandboxesWithMetrics, err := a.getSandboxesMetrics(ctx, team.ID, sandboxes) if err != nil { - zap.L().Error("Error fetching metrics for sandboxes", zap.Error(err)) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error fetching metrics for sandboxes", err) a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error returning metrics for sandboxes for team '%s'", team.ID)) return diff --git a/packages/api/internal/handlers/teams.go b/packages/api/internal/handlers/teams.go index 67086caae..9cfbb8a05 100644 --- a/packages/api/internal/handlers/teams.go +++ b/packages/api/internal/handlers/teams.go @@ -1,10 +1,10 @@ package handlers import ( - "log" "net/http" "github.com/gin-gonic/gin" + "go.uber.org/zap" "github.com/e2b-dev/infra/packages/api/internal/api" "github.com/e2b-dev/infra/packages/api/internal/team" @@ -17,7 +17,7 @@ func (a *APIStore) GetTeams(c *gin.Context) { results, err := a.sqlcDB.GetTeamsWithUsersTeams(ctx, userID) if err != nil { - log.Println("Error when starting transaction: ", err) + zap.L().Error("error when starting transaction", zap.Error(err)) c.JSON(http.StatusInternalServerError, "Error when starting transaction") return @@ -28,7 +28,7 @@ func (a *APIStore) GetTeams(c *gin.Context) { apiKey, err := team.CreateAPIKey(ctx, a.db, row.Team.ID, userID, "CLI login/configure") if err != nil { - log.Println("Error when creating API key: ", err) + zap.L().Error("error when creating API key", zap.Error(err)) c.JSON(http.StatusInternalServerError, "Error when creating API key") return diff --git a/packages/api/internal/handlers/template_build_status.go b/packages/api/internal/handlers/template_build_status.go index feab5cf1d..18a2fdf20 100644 --- a/packages/api/internal/handlers/template_build_status.go +++ b/packages/api/internal/handlers/template_build_status.go @@ -34,11 +34,9 @@ func (a *APIStore) GetTemplatesTemplateIDBuildsBuildIDStatus(c *gin.Context, tem userID := c.Value(auth.UserIDContextKey).(uuid.UUID) teams, err := a.db.GetTeams(ctx, userID) if err != nil { - errMsg := fmt.Errorf("error when getting teams: %w", err) - a.sendAPIStoreError(c, http.StatusInternalServerError, "Failed to get the default team") - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when getting teams", err) return } diff --git a/packages/api/internal/handlers/template_delete.go b/packages/api/internal/handlers/template_delete.go index 474ba7dbb..a86ac0c70 100644 --- a/packages/api/internal/handlers/template_delete.go +++ b/packages/api/internal/handlers/template_delete.go @@ -25,8 +25,7 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid env ID: %s", aliasOrTemplateID)) - err = fmt.Errorf("invalid env ID: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "invalid env ID", err) return } @@ -36,8 +35,7 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when getting default team: %s", err)) - err = fmt.Errorf("error when getting default team: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when getting default team", err) return } @@ -110,8 +108,7 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a dbErr := a.db.DeleteEnv(ctx, template.ID) if dbErr != nil { - errMsg := fmt.Errorf("error when deleting env from db: %w", dbErr) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when deleting env from db", dbErr) a.sendAPIStoreError(c, http.StatusInternalServerError, "Error when deleting env") @@ -127,8 +124,7 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a // delete all builds deleteJobErr := a.templateManager.DeleteBuilds(ctx, buildIds) if deleteJobErr != nil { - errMsg := fmt.Errorf("error when deleting env files from storage: %w", deleteJobErr) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when deleting env files from storage", deleteJobErr) } else { telemetry.ReportEvent(ctx, "deleted env from storage") } diff --git a/packages/api/internal/handlers/template_request_build.go b/packages/api/internal/handlers/template_request_build.go index aba791c4e..8d9fa17d6 100644 --- a/packages/api/internal/handlers/template_request_build.go +++ b/packages/api/internal/handlers/template_request_build.go @@ -39,8 +39,7 @@ func (a *APIStore) PostTemplatesTemplateID(c *gin.Context, templateID api.Templa if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid template ID: %s", cleanedTemplateID)) - err = fmt.Errorf("invalid template ID: %w", err) - telemetry.ReportCriticalError(c.Request.Context(), err) + telemetry.ReportCriticalError(c.Request.Context(), "invalid template ID", err) return } @@ -65,8 +64,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when getting user: %s", err)) - err = fmt.Errorf("error when getting default team: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when getting user", err) return nil } @@ -76,8 +74,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid team ID: %s", *body.TeamID)) - err = fmt.Errorf("invalid team ID: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "invalid team ID", err) return nil } @@ -92,8 +89,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if team == nil { a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("Team '%s' not found", *body.TeamID)) - err = fmt.Errorf("team not found: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "team not found", err) return nil } @@ -108,8 +104,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if team == nil { a.sendAPIStoreError(c, http.StatusInternalServerError, "Default team not found") - err = fmt.Errorf("default team not found: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "default team not found", err) return nil } @@ -119,11 +114,9 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI // Check if the user has access to the template _, err = a.db.Client.Env.Query().Where(env.ID(templateID), env.TeamID(team.ID)).Only(ctx) if err != nil { - errMsg := fmt.Sprintf("Error when getting template '%s' for team '%s'", templateID, team.ID.String()) - a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("%s: %s", errMsg, err)) + a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("Error when getting template '%s' for team '%s'", templateID, team.ID.String())) - err = fmt.Errorf("%s: %w", errMsg, err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when getting template", err, attribute.String("template_id", templateID), attribute.String("team_id", team.ID.String())) return nil } @@ -132,8 +125,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI // Generate a build id for the new build buildID, err := uuid.NewRandom() if err != nil { - err = fmt.Errorf("error when generating build id: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when generating build id", err) a.sendAPIStoreError(c, http.StatusInternalServerError, "Failed to generate build id") @@ -167,7 +159,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI cpuCount, ramMB, apiError := getCPUAndRAM(team.Tier, body.CpuCount, body.MemoryMB) if apiError != nil { - telemetry.ReportCriticalError(ctx, apiError.Err) + telemetry.ReportCriticalError(ctx, "error when getting CPU and RAM", apiError.Err) a.sendAPIStoreError(c, apiError.Code, apiError.ClientMsg) return nil @@ -179,8 +171,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid alias: %s", alias)) - err = fmt.Errorf("invalid alias: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "invalid alias", err) return nil } @@ -191,8 +182,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when starting transaction: %s", err)) - err = fmt.Errorf("error when starting transaction: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when starting transaction", err) return nil } @@ -215,8 +205,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when updating template: %s", err)) - err = fmt.Errorf("error when updating env: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when updating env", err) return nil } @@ -229,8 +218,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when updating template: %s", err)) - err = fmt.Errorf("error when updating env: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when updating env", err) return nil } @@ -259,8 +247,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when querying alias '%s': %s", alias, err)) - err = fmt.Errorf("error when checking alias '%s': %w", alias, err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when checking alias", err, attribute.String("alias", alias)) return nil } @@ -268,8 +255,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if len(envs) > 0 { a.sendAPIStoreError(c, http.StatusConflict, fmt.Sprintf("Alias '%s' is already used", alias)) - err = fmt.Errorf("conflict of alias '%s' with template ID: %w", alias, err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "conflict of alias", err, attribute.String("alias", alias)) return nil } @@ -279,8 +265,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if !models.IsNotFound(err) { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when querying for alias: %s", err)) - err = fmt.Errorf("error when checking alias: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when checking alias", err, attribute.String("alias", alias)) return nil @@ -290,8 +275,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when deleting template alias: %s", err)) - err = fmt.Errorf("error when deleting template alias: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when deleting template alias", err, attribute.String("alias", alias)) return nil } @@ -308,8 +292,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when inserting alias '%s': %s", alias, err)) - err = fmt.Errorf("error when inserting alias '%s': %w", alias, err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when inserting alias", err, attribute.String("alias", alias)) return nil @@ -317,8 +300,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI } else if aliasDB.EnvID != templateID { a.sendAPIStoreError(c, http.StatusForbidden, fmt.Sprintf("Alias '%s' already used", alias)) - err = fmt.Errorf("alias '%s' already used: %w", alias, err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "alias already used", err, attribute.String("alias", alias)) return nil } @@ -331,8 +313,7 @@ func (a *APIStore) TemplateRequestBuild(c *gin.Context, templateID api.TemplateI if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when committing transaction: %s", err)) - err = fmt.Errorf("error when committing transaction: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when committing transaction", err) return nil } diff --git a/packages/api/internal/handlers/template_start_build.go b/packages/api/internal/handlers/template_start_build.go index cd447374f..8c9639f53 100644 --- a/packages/api/internal/handlers/template_start_build.go +++ b/packages/api/internal/handlers/template_start_build.go @@ -29,8 +29,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid build ID: %s", buildID)) - err = fmt.Errorf("invalid build ID: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "invalid build ID", err) return } @@ -39,8 +38,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when getting default team: %s", err)) - err = fmt.Errorf("error when getting default team: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when getting default team", err) return } @@ -58,8 +56,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template if err != nil { a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("Error when getting template: %s", err)) - err = fmt.Errorf("error when getting env: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when getting env", err, attribute.String("template_id", templateID)) return } @@ -76,8 +73,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template if team == nil { a.sendAPIStoreError(c, http.StatusForbidden, fmt.Sprintf("User does not have access to the template")) - err = fmt.Errorf("user '%s' does not have access to the template '%s'", userID, templateID) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "user does not have access to the template", err, attribute.String("template_id", templateID)) return } @@ -107,8 +103,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template // make sure there is no other build in progress for the same template if concurrentlyRunningBuilds > 0 { a.sendAPIStoreError(c, http.StatusConflict, fmt.Sprintf("There is already a build in progress for the template")) - err = fmt.Errorf("there is already a build in progress for the template '%s'", templateID) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "there is already a build in progress for the template", err, attribute.String("template_id", templateID)) return } @@ -122,8 +117,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template // only waiting builds can be triggered if build.Status != envbuild.StatusWaiting { a.sendAPIStoreError(c, http.StatusBadRequest, "build is not in waiting state") - err = fmt.Errorf("build is not in waiting state: %s", build.Status) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "build is not in waiting state", fmt.Errorf("build is not in waiting state: %s", build.Status), attribute.String("template_id", templateID)) return } @@ -142,9 +136,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template ) if buildErr != nil { - buildErr = fmt.Errorf("error when building env: %w", buildErr) - zap.L().Error("build failed", zap.Error(buildErr)) - telemetry.ReportCriticalError(ctx, buildErr) + telemetry.ReportCriticalError(ctx, "build failed", buildErr, attribute.String("template_id", templateID)) err = a.templateManager.SetStatus( ctx, @@ -154,7 +146,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template fmt.Sprintf("error when building env: %s", buildErr), ) if err != nil { - telemetry.ReportCriticalError(ctx, fmt.Errorf("error when setting build status: %w", err)) + telemetry.ReportCriticalError(ctx, "error when setting build status", err) } return @@ -170,7 +162,7 @@ func (a *APIStore) PostTemplatesTemplateIDBuildsBuildID(c *gin.Context, template "starting build", ) if err != nil { - telemetry.ReportCriticalError(ctx, fmt.Errorf("error when setting build status: %w", err)) + telemetry.ReportCriticalError(ctx, "error when setting build status", err) return } diff --git a/packages/api/internal/handlers/template_update.go b/packages/api/internal/handlers/template_update.go index 43d6a7412..bee5b5f4d 100644 --- a/packages/api/internal/handlers/template_update.go +++ b/packages/api/internal/handlers/template_update.go @@ -33,8 +33,7 @@ func (a *APIStore) PatchTemplatesTemplateID(c *gin.Context, aliasOrTemplateID ap if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid env ID: %s", aliasOrTemplateID)) - err = fmt.Errorf("invalid env ID: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "invalid env ID", err) return } @@ -44,8 +43,7 @@ func (a *APIStore) PatchTemplatesTemplateID(c *gin.Context, aliasOrTemplateID ap if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when getting default team: %s", err)) - err = fmt.Errorf("error when getting default team: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when getting default team", err) return } @@ -63,12 +61,13 @@ func (a *APIStore) PatchTemplatesTemplateID(c *gin.Context, aliasOrTemplateID ap notFound := models.IsNotFound(err) if notFound { - telemetry.ReportError(ctx, fmt.Errorf("template '%s' not found", aliasOrTemplateID)) + telemetry.ReportError(ctx, "template not found", fmt.Errorf("template '%s' not found", aliasOrTemplateID)) a.sendAPIStoreError(c, http.StatusNotFound, fmt.Sprintf("the sandbox template '%s' wasn't found", cleanedAliasOrEnvID)) return } else if err != nil { - telemetry.ReportError(ctx, fmt.Errorf("failed to get env '%s': %w", aliasOrTemplateID, err)) + telemetry.ReportError(ctx, "failed to get env", err, attribute.String("env_id", aliasOrTemplateID)) + a.sendAPIStoreError(c, http.StatusInternalServerError, "Error when getting env") return @@ -83,8 +82,7 @@ func (a *APIStore) PatchTemplatesTemplateID(c *gin.Context, aliasOrTemplateID ap } if team == nil { - errMsg := fmt.Errorf("user '%s' doesn't have access to the sandbox template '%s'", userID, cleanedAliasOrEnvID) - telemetry.ReportError(ctx, errMsg) + telemetry.ReportError(ctx, "user doesn't have access to the sandbox template", fmt.Errorf("user '%s' doesn't have access to the sandbox template '%s'", userID, cleanedAliasOrEnvID)) a.sendAPIStoreError(c, http.StatusForbidden, fmt.Sprintf("You (%s) don't have access to sandbox template '%s'", userID, cleanedAliasOrEnvID)) @@ -98,8 +96,7 @@ func (a *APIStore) PatchTemplatesTemplateID(c *gin.Context, aliasOrTemplateID ap }) if dbErr != nil { - errMsg := fmt.Errorf("error when updating env: %w", dbErr) - telemetry.ReportError(ctx, errMsg) + telemetry.ReportError(ctx, "error when updating env", dbErr) a.sendAPIStoreError(c, http.StatusInternalServerError, "Error when updating env") return diff --git a/packages/api/internal/handlers/templates_list.go b/packages/api/internal/handlers/templates_list.go index 4c8552fa2..33aa75c6a 100644 --- a/packages/api/internal/handlers/templates_list.go +++ b/packages/api/internal/handlers/templates_list.go @@ -25,8 +25,7 @@ func (a *APIStore) GetTemplates(c *gin.Context, params api.GetTemplatesParams) { if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when getting teams")) - err = fmt.Errorf("error when getting teams: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when getting teams", err) return } @@ -36,7 +35,7 @@ func (a *APIStore) GetTemplates(c *gin.Context, params api.GetTemplatesParams) { if err != nil { a.sendAPIStoreError(c, http.StatusBadRequest, "Invalid team ID") - telemetry.ReportError(ctx, err) + telemetry.ReportError(ctx, "invalid team ID", err) return } @@ -51,7 +50,7 @@ func (a *APIStore) GetTemplates(c *gin.Context, params api.GetTemplatesParams) { if team == nil { a.sendAPIStoreError(c, http.StatusNotFound, "Team not found") - telemetry.ReportError(ctx, fmt.Errorf("team not found")) + telemetry.ReportError(ctx, "team not found", err) return } @@ -66,7 +65,7 @@ func (a *APIStore) GetTemplates(c *gin.Context, params api.GetTemplatesParams) { if team == nil { a.sendAPIStoreError(c, http.StatusInternalServerError, "Default team not found") - telemetry.ReportError(ctx, fmt.Errorf("default team not found")) + telemetry.ReportError(ctx, "default team not found", err) return } @@ -81,8 +80,7 @@ func (a *APIStore) GetTemplates(c *gin.Context, params api.GetTemplatesParams) { if err != nil { a.sendAPIStoreError(c, http.StatusInternalServerError, "Error when getting sandbox templates") - err = fmt.Errorf("error when getting envs: %w", err) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error when getting envs", err) return } diff --git a/packages/api/internal/orchestrator/create_instance.go b/packages/api/internal/orchestrator/create_instance.go index 8c8f00894..61ce6aa85 100644 --- a/packages/api/internal/orchestrator/create_instance.go +++ b/packages/api/internal/orchestrator/create_instance.go @@ -4,11 +4,11 @@ import ( "context" _ "embed" "fmt" - "log" "net/http" "time" "go.opentelemetry.io/otel/attribute" + "go.uber.org/zap" "google.golang.org/protobuf/types/known/timestamppb" "github.com/e2b-dev/infra/packages/api/internal/api" @@ -55,15 +55,14 @@ func (o *Orchestrator) CreateSandbox( // Check if team has reached max instances err, releaseTeamSandboxReservation := o.instanceCache.Reserve(sandboxID, team.Team.ID, team.Tier.ConcurrentInstances) if err != nil { - errMsg := fmt.Errorf("team '%s' has reached the maximum number of instances (%d)", team.Team.ID, team.Tier.ConcurrentInstances) - telemetry.ReportCriticalError(ctx, fmt.Errorf("%w (error: %w)", errMsg, err)) + telemetry.ReportCriticalError(ctx, "team has reached the maximum number of instances", err, attribute.Int64("concurrent_instances", team.Tier.ConcurrentInstances)) return nil, &api.APIError{ Code: http.StatusTooManyRequests, ClientMsg: fmt.Sprintf( "you have reached the maximum number of concurrent E2B sandboxes (%d). If you need more, "+ "please contact us at 'https://e2b.dev/docs/getting-help'", team.Tier.ConcurrentInstances), - Err: errMsg, + Err: fmt.Errorf("team '%s' has reached the maximum number of instances (%d)", team.Team.ID, team.Tier.ConcurrentInstances), } } @@ -144,13 +143,12 @@ func (o *Orchestrator) CreateSandbox( if node == nil { node, err = o.getLeastBusyNode(childCtx, nodesExcluded) if err != nil { - errMsg := fmt.Errorf("failed to get least busy node: %w", err) - telemetry.ReportError(childCtx, errMsg) + telemetry.ReportError(childCtx, "failed to get least busy node", err) return nil, &api.APIError{ Code: http.StatusInternalServerError, ClientMsg: "Failed to get node to place sandbox on.", - Err: errMsg, + Err: fmt.Errorf("failed to get least busy node: %w", err), } } } @@ -170,7 +168,12 @@ func (o *Orchestrator) CreateSandbox( node.sbxsInProgress.Remove(sandboxID) - log.Printf("failed to create sandbox '%s' on node '%s', attempt #%d: %v", sandboxID, node.Info.ID, attempt, utils.UnwrapGRPCError(err)) + zap.L().Warn("failed to create sandbox on node", + zap.Error(utils.UnwrapGRPCError(err)), + zap.String("node_id", node.Info.ID), + zap.String("sandbox_id", sandboxID), + zap.Int("attempt", attempt), + ) // The node is not available, try again with another node node.createFails.Add(1) @@ -223,8 +226,7 @@ func (o *Orchestrator) CreateSandbox( cacheErr := o.instanceCache.Add(childCtx, instanceInfo, true) if cacheErr != nil { - errMsg := fmt.Errorf("error when adding instance to cache: %w", cacheErr) - telemetry.ReportError(ctx, errMsg) + telemetry.ReportError(ctx, "error when adding instance to cache", cacheErr) deleted := o.DeleteInstance(childCtx, sbx.SandboxID, false) if !deleted { @@ -234,7 +236,7 @@ func (o *Orchestrator) CreateSandbox( return nil, &api.APIError{ Code: http.StatusInternalServerError, ClientMsg: "Failed to create sandbox", - Err: errMsg, + Err: fmt.Errorf("error when adding instance to cache: %w", cacheErr), } } diff --git a/packages/api/internal/orchestrator/node.go b/packages/api/internal/orchestrator/node.go index 2ea72a4bd..7c1923e59 100644 --- a/packages/api/internal/orchestrator/node.go +++ b/packages/api/internal/orchestrator/node.go @@ -3,7 +3,6 @@ package orchestrator import ( "context" "fmt" - "os" "sync" "sync/atomic" "time" @@ -120,7 +119,7 @@ func (o *Orchestrator) GetNodes() []*api.Node { for _, sbx := range o.instanceCache.Items() { n, ok := nodes[sbx.Instance.ClientID] if !ok { - fmt.Fprintf(os.Stderr, "node [%s] for sandbox [%s] wasn't found \n", sbx.Instance.ClientID, sbx.Instance.SandboxID) + zap.L().Error("node for sandbox wasn't found", zap.String("client_id", sbx.Instance.ClientID), zap.String("sandbox_id", sbx.Instance.SandboxID)) continue } diff --git a/packages/api/internal/orchestrator/pause_instance.go b/packages/api/internal/orchestrator/pause_instance.go index a720267ac..f59ed0f4a 100644 --- a/packages/api/internal/orchestrator/pause_instance.go +++ b/packages/api/internal/orchestrator/pause_instance.go @@ -52,33 +52,29 @@ func (o *Orchestrator) PauseInstance( teamID, ) if err != nil { - errMsg := fmt.Errorf("error pausing sandbox: %w", err) + telemetry.ReportCriticalError(ctx, "error pausing sandbox", err) - telemetry.ReportCriticalError(ctx, errMsg) - - return errMsg + return err } err = snapshotInstance(ctx, o, sbx, *envBuild.EnvID, envBuild.ID.String()) if errors.Is(err, ErrPauseQueueExhausted{}) { - telemetry.ReportCriticalError(ctx, fmt.Errorf("pause queue exhausted %w", err)) + telemetry.ReportCriticalError(ctx, "pause queue exhausted", err) return ErrPauseQueueExhausted{} } if err != nil && !errors.Is(err, ErrPauseQueueExhausted{}) { - errMsg := fmt.Errorf("error pausing sandbox: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error pausing sandbox", err) - return errMsg + return fmt.Errorf("error pausing sandbox: %w", err) } err = o.dbClient.EnvBuildSetStatus(ctx, *envBuild.EnvID, envBuild.ID, envbuild.StatusSuccess) if err != nil { - errMsg := fmt.Errorf("error pausing sandbox: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error pausing sandbox", err) - return errMsg + return fmt.Errorf("error pausing sandbox: %w", err) } return nil diff --git a/packages/api/internal/team/apikeys.go b/packages/api/internal/team/apikeys.go index 1c8a83834..f0fcd6505 100644 --- a/packages/api/internal/team/apikeys.go +++ b/packages/api/internal/team/apikeys.go @@ -16,10 +16,9 @@ import ( func CreateAPIKey(ctx context.Context, db *db.DB, teamID uuid.UUID, userID uuid.UUID, name string) (*models.TeamAPIKey, error) { teamApiKey, err := keys.GenerateKey(keys.ApiKeyPrefix) if err != nil { - errMsg := fmt.Errorf("error when generating team API key: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when generating team API key", err) - return nil, errMsg + return nil, fmt.Errorf("error when generating team API key: %w", err) } apiKey, err := db.Client.TeamAPIKey. @@ -34,8 +33,9 @@ func CreateAPIKey(ctx context.Context, db *db.DB, teamID uuid.UUID, userID uuid. SetName(name). Save(ctx) if err != nil { - errMsg := fmt.Errorf("error when creating API key: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error when creating API key", err) + + return nil, fmt.Errorf("error when creating API key: %w", err) } return apiKey, nil diff --git a/packages/api/internal/utils/body.go b/packages/api/internal/utils/body.go index 5561a0966..adbd674f6 100644 --- a/packages/api/internal/utils/body.go +++ b/packages/api/internal/utils/body.go @@ -14,11 +14,9 @@ import ( func ParseBody[B any](ctx context.Context, c *gin.Context) (body B, err error) { err = c.Bind(&body) if err != nil { - bodyErr := fmt.Errorf("error when parsing request: %w", err) + telemetry.ReportCriticalError(ctx, "error when parsing request", err) - telemetry.ReportCriticalError(ctx, bodyErr) - - return body, bodyErr + return body, fmt.Errorf("error when parsing request: %w", err) } return body, nil diff --git a/packages/api/internal/utils/counter.go b/packages/api/internal/utils/counter.go index b29e0cdb5..ab58c2524 100644 --- a/packages/api/internal/utils/counter.go +++ b/packages/api/internal/utils/counter.go @@ -2,10 +2,11 @@ package utils import ( "context" - "log" "sync" "time" + "go.uber.org/zap" + "github.com/e2b-dev/infra/packages/shared/pkg/db" ) @@ -69,7 +70,7 @@ func (t *TemplateSpawnCounter) flushCounters(dbClient *db.DB) { for templateID, counter := range updates { err := dbClient.UpdateEnvLastUsed(context.Background(), int64(counter.count), counter.lastUpdate, templateID) if err != nil { - log.Println("Error updating template spawn count:", err) + zap.L().Error("error updating template spawn count", zap.Error(err)) } } } diff --git a/packages/orchestrator/internal/server/sandboxes.go b/packages/orchestrator/internal/server/sandboxes.go index e72738061..d8b2cc3e2 100644 --- a/packages/orchestrator/internal/server/sandboxes.go +++ b/packages/orchestrator/internal/server/sandboxes.go @@ -66,9 +66,10 @@ func (s *server) Create(ctxConn context.Context, req *orchestrator.SandboxCreate zap.L().Error("failed to create sandbox, cleaning up", zap.Error(err)) cleanupErr := cleanup.Run(ctx) - errMsg := fmt.Errorf("failed to cleanup sandbox: %w", errors.Join(err, context.Cause(ctx), cleanupErr)) - telemetry.ReportCriticalError(ctx, errMsg) + errs := errors.Join(err, context.Cause(ctx), cleanupErr) + telemetry.ReportCriticalError(ctx, "failed to create sandbox", errs) + errMsg := fmt.Errorf("failed to cleanup sandbox: %w", errs) return nil, status.New(codes.Internal, errMsg.Error()).Err() } @@ -125,7 +126,7 @@ func (s *server) Update(ctx context.Context, req *orchestrator.SandboxUpdateRequ item, ok := s.sandboxes.Get(req.SandboxId) if !ok { errMsg := fmt.Errorf("sandbox not found") - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "sandbox not found", errMsg) return nil, status.New(codes.NotFound, errMsg.Error()).Err() } @@ -180,7 +181,7 @@ func (s *server) Delete(ctxConn context.Context, in *orchestrator.SandboxDeleteR sbx, ok := s.sandboxes.Get(in.SandboxId) if !ok { errMsg := fmt.Errorf("sandbox '%s' not found", in.SandboxId) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "sandbox not found", errMsg, attribute.String("sandbox_id", in.SandboxId)) return nil, status.New(codes.NotFound, errMsg.Error()).Err() } @@ -215,7 +216,7 @@ func (s *server) Pause(ctx context.Context, in *orchestrator.SandboxPauseRequest err := pauseQueue.Acquire(ctx, 1) if err != nil { - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "pause queue exhausted", err) return nil, status.New(codes.ResourceExhausted, err.Error()).Err() } @@ -233,7 +234,7 @@ func (s *server) Pause(ctx context.Context, in *orchestrator.SandboxPauseRequest s.pauseMu.Unlock() errMsg := fmt.Errorf("sandbox not found") - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "sandbox not found", errMsg, attribute.String("sandbox_id", in.SandboxId)) return nil, status.New(codes.NotFound, errMsg.Error()).Err() } @@ -250,9 +251,9 @@ func (s *server) Pause(ctx context.Context, in *orchestrator.SandboxPauseRequest sbx.Config.HugePages, ).NewTemplateCacheFiles() if err != nil { - errMsg := fmt.Errorf("error creating template files: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error creating template files", err) + errMsg := fmt.Errorf("error creating template files: %w", err) return nil, status.New(codes.Internal, errMsg.Error()).Err() } @@ -272,17 +273,17 @@ func (s *server) Pause(ctx context.Context, in *orchestrator.SandboxPauseRequest err = os.MkdirAll(snapshotTemplateFiles.CacheDir(), 0o755) if err != nil { - errMsg := fmt.Errorf("error creating sandbox cache dir '%s': %w", snapshotTemplateFiles.CacheDir(), err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error creating sandbox cache dir", err, attribute.String("cache_dir", snapshotTemplateFiles.CacheDir())) + errMsg := fmt.Errorf("error creating sandbox cache dir '%s': %w", snapshotTemplateFiles.CacheDir(), err) return nil, status.New(codes.Internal, errMsg.Error()).Err() } snapshot, err := sbx.Snapshot(ctx, s.tracer, snapshotTemplateFiles, releaseOnce) if err != nil { - errMsg := fmt.Errorf("error snapshotting sandbox '%s': %w", in.SandboxId, err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error snapshotting sandbox", err, attribute.String("sandbox_id", in.SandboxId)) + errMsg := fmt.Errorf("error snapshotting sandbox '%s': %w", in.SandboxId, err) return nil, status.New(codes.Internal, errMsg.Error()).Err() } @@ -299,9 +300,9 @@ func (s *server) Pause(ctx context.Context, in *orchestrator.SandboxPauseRequest snapshot.RootfsDiff, ) if err != nil { - errMsg := fmt.Errorf("error adding snapshot to template cache: %w", err) - telemetry.ReportCriticalError(ctx, errMsg) + telemetry.ReportCriticalError(ctx, "error adding snapshot to template cache", err) + errMsg := fmt.Errorf("error adding snapshot to template cache: %w", err) return nil, status.New(codes.Internal, errMsg.Error()).Err() } diff --git a/packages/orchestrator/internal/template/build/build.go b/packages/orchestrator/internal/template/build/build.go index 8db0f5966..5b04a67aa 100644 --- a/packages/orchestrator/internal/template/build/build.go +++ b/packages/orchestrator/internal/template/build/build.go @@ -83,20 +83,18 @@ func (b *TemplateBuilder) Build(ctx context.Context, template *Env, envID string removeErr := template.Remove(removeCtx, b.tracer) if removeErr != nil { - b.logger.Error("Error while removing template files", zap.Error(removeErr)) - telemetry.ReportError(ctx, removeErr) + telemetry.ReportError(ctx, "error while removing template files", removeErr) } }() err = template.Build(ctx, b.tracer, postProcessor, b.dockerClient, b.legacyDockerClient) if err != nil { postProcessor.WriteMsg(fmt.Sprintf("Error building environment: %v", err)) - telemetry.ReportCriticalError(ctx, err) + telemetry.ReportCriticalError(ctx, "error building environment", err) buildStateErr := b.buildCache.SetFailed(envID, buildID) if buildStateErr != nil { - b.logger.Error("Error while setting build state to failed", zap.Error(buildStateErr)) - telemetry.ReportError(ctx, buildStateErr) + telemetry.ReportError(ctx, "error while setting build state to failed", buildStateErr) } return err @@ -110,8 +108,7 @@ func (b *TemplateBuilder) Build(ctx context.Context, template *Env, envID string removeErr := b.templateStorage.Remove(removeCtx, buildID) if removeErr != nil { - b.logger.Error("Error while removing build files", zap.Error(removeErr)) - telemetry.ReportError(ctx, removeErr) + telemetry.ReportError(ctx, "error while removing build files", removeErr) } } }() @@ -259,12 +256,11 @@ func (b *TemplateBuilder) Build(ctx context.Context, template *Env, envID string out, err := cmd.Output() if err != nil { postProcessor.WriteMsg(fmt.Sprintf("Error while getting envd version: %v", err)) - telemetry.ReportError(ctx, err) + telemetry.ReportError(ctx, "error while getting envd version", err) buildStateErr := b.buildCache.SetFailed(envID, buildID) if buildStateErr != nil { - b.logger.Error("Error while setting build state to failed", zap.Error(buildStateErr)) - telemetry.ReportError(ctx, buildStateErr) + telemetry.ReportError(ctx, "error while setting build state to failed", buildStateErr) } return err @@ -278,14 +274,12 @@ func (b *TemplateBuilder) Build(ctx context.Context, template *Env, envID string uploadErr := <-upload if uploadErr != nil { - errMsg := fmt.Sprintf("Error while uploading build files: %v", uploadErr) - postProcessor.WriteMsg(errMsg) - telemetry.ReportError(ctx, uploadErr) + postProcessor.WriteMsg(fmt.Sprintf("Error while uploading build files: %v", uploadErr)) + telemetry.ReportError(ctx, "error while uploading build files", uploadErr) buildStateErr := b.buildCache.SetFailed(envID, buildID) if buildStateErr != nil { - b.logger.Error("Error while setting build state to failed", zap.Error(buildStateErr)) - telemetry.ReportError(ctx, buildStateErr) + telemetry.ReportError(ctx, "error while setting build state to failed", buildStateErr) } return uploadErr @@ -294,8 +288,8 @@ func (b *TemplateBuilder) Build(ctx context.Context, template *Env, envID string buildMetadata := &template_manager.TemplateBuildMetadata{RootfsSizeKey: int32(template.RootfsSizeMB()), EnvdVersionKey: strings.TrimSpace(string(out))} err = b.buildCache.SetSucceeded(envID, buildID, buildMetadata) if err != nil { - b.logger.Error("Error while setting build state to succeeded", zap.Error(err)) - telemetry.ReportError(ctx, err) + telemetry.ReportError(ctx, "error while setting build state to succeeded", err) + return err } diff --git a/packages/orchestrator/internal/template/build/rootfs.go b/packages/orchestrator/internal/template/build/rootfs.go index 21eaa918e..64feef863 100644 --- a/packages/orchestrator/internal/template/build/rootfs.go +++ b/packages/orchestrator/internal/template/build/rootfs.go @@ -6,7 +6,6 @@ import ( "context" "encoding/base64" "encoding/json" - "errors" "fmt" "io" "math" @@ -121,26 +120,23 @@ func (r *Rootfs) pullDockerImage(ctx context.Context, tracer trace.Tracer) error Platform: "linux/amd64", }) if err != nil { - errMsg := fmt.Errorf("error pulling image: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error pulling image", err) - return errMsg + return fmt.Errorf("error pulling image: %w", err) } _, err = io.Copy(os.Stdout, logs) if err != nil { - errMsg := fmt.Errorf("error copying logs: %w", err) - telemetry.ReportError(childCtx, errMsg) + telemetry.ReportError(childCtx, "error copying logs", err) - return errMsg + return fmt.Errorf("error copying logs: %w", err) } err = logs.Close() if err != nil { - errMsg := fmt.Errorf("error closing logs: %w", err) - telemetry.ReportError(childCtx, errMsg) + telemetry.ReportError(childCtx, "error closing logs", err) - return errMsg + return fmt.Errorf("error closing logs: %w", err) } telemetry.ReportEvent(childCtx, "pulled image") @@ -157,8 +153,7 @@ func (r *Rootfs) cleanupDockerImage(ctx context.Context, tracer trace.Tracer) { PruneChildren: false, }) if err != nil { - errMsg := fmt.Errorf("error removing image: %w", err) - telemetry.ReportError(childCtx, errMsg) + telemetry.ReportError(childCtx, "error removing image", err) } else { telemetry.ReportEvent(childCtx, "removed image") } @@ -188,10 +183,9 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post MemoryLimit: int(math.Min(float64(r.env.MemoryMB)/2, 512)), }) if err != nil { - errMsg := fmt.Errorf("error executing provision script: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error executing provision script", err) - return errMsg + return fmt.Errorf("error executing provision script: %w", err) } telemetry.ReportEvent(childCtx, "executed provision script env") @@ -222,10 +216,9 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post }, }, nil, &v1.Platform{}, "") if err != nil { - errMsg := fmt.Errorf("error creating container: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error creating container", err) - return errMsg + return fmt.Errorf("error creating container: %w", err) } telemetry.ReportEvent(childCtx, "created container") @@ -245,8 +238,7 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post Context: cleanupContext, }) if removeErr != nil { - errMsg := fmt.Errorf("error removing container: %w", removeErr) - telemetry.ReportError(cleanupContext, errMsg) + telemetry.ReportError(cleanupContext, "error removing container", removeErr) } else { telemetry.ReportEvent(cleanupContext, "removed container") } @@ -259,24 +251,21 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post All: true, }) if pruneErr != nil { - errMsg := fmt.Errorf("error pruning build cache: %w", pruneErr) - telemetry.ReportError(cleanupContext, errMsg) + telemetry.ReportError(cleanupContext, "error pruning build cache", pruneErr) } else { telemetry.ReportEvent(cleanupContext, "pruned build cache") } _, pruneErr = r.client.ImagesPrune(cleanupContext, filters.NewArgs(cacheTimeoutArg)) if pruneErr != nil { - errMsg := fmt.Errorf("error pruning images: %w", pruneErr) - telemetry.ReportError(cleanupContext, errMsg) + telemetry.ReportError(cleanupContext, "error pruning images", pruneErr) } else { telemetry.ReportEvent(cleanupContext, "pruned images") } _, pruneErr = r.client.ContainersPrune(cleanupContext, filters.NewArgs(cacheTimeoutArg)) if pruneErr != nil { - errMsg := fmt.Errorf("error pruning containers: %w", pruneErr) - telemetry.ReportError(cleanupContext, errMsg) + telemetry.ReportError(cleanupContext, "error pruning containers", pruneErr) } else { telemetry.ReportEvent(cleanupContext, "pruned containers") } @@ -301,8 +290,7 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post defer func() { closeErr := pw.CloseWithError(errMsg) if closeErr != nil { - errMsg := fmt.Errorf("error closing pipe: %w", closeErr) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error closing pipe", closeErr) } else { telemetry.ReportEvent(childCtx, "closed pipe") } @@ -312,8 +300,7 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post defer func() { err = tw.Close() if err != nil { - errMsg = fmt.Errorf("error closing tar writer: %w", errors.Join(err, errMsg)) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error closing tar writer", err) } else { telemetry.ReportEvent(childCtx, "closed tar writer") } @@ -322,8 +309,7 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post for _, file := range filesToTar { addErr := addFileToTarWriter(tw, file) if addErr != nil { - errMsg = fmt.Errorf("error adding envd to tar writer: %w", addErr) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error adding envd to tar writer", addErr) break } else { @@ -340,10 +326,9 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post NoOverwriteDirNonDir: false, }) if err != nil { - errMsg := fmt.Errorf("error copying envd to container: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error copying envd to container", err) - return errMsg + return fmt.Errorf("error copying envd to container: %w", err) } telemetry.ReportEvent(childCtx, "copied envd to container") @@ -351,10 +336,9 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post postProcessor.WriteMsg("Provisioning template") err = r.client.ContainerStart(childCtx, cont.ID, container.StartOptions{}) if err != nil { - errMsg := fmt.Errorf("error starting container: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error starting container", err) - return errMsg + return fmt.Errorf("error starting container: %w", err) } telemetry.ReportEvent(childCtx, "started container") @@ -385,8 +369,7 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post Timestamps: false, }) if logsErr != nil { - errMsg := fmt.Errorf("error getting container logs: %w", logsErr) - telemetry.ReportError(anonymousChildCtx, errMsg) + telemetry.ReportError(anonymousChildCtx, "error getting container logs", logsErr) } else { telemetry.ReportEvent(anonymousChildCtx, "setup container logs") } @@ -395,23 +378,20 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post wait, errWait := r.client.ContainerWait(childCtx, cont.ID, container.WaitConditionNotRunning) select { case <-childCtx.Done(): - errMsg := fmt.Errorf("error waiting for container: %w", childCtx.Err()) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error waiting for container", childCtx.Err()) - return errMsg + return fmt.Errorf("error waiting for container: %w", childCtx.Err()) case waitErr := <-errWait: if waitErr != nil { - errMsg := fmt.Errorf("error waiting for container: %w", waitErr) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error waiting for container", waitErr) - return errMsg + return fmt.Errorf("error waiting for container: %w", waitErr) } case response := <-wait: if response.Error != nil { - errMsg := fmt.Errorf("error waiting for container - code %d: %s", response.StatusCode, response.Error.Message) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error waiting for container", fmt.Errorf("%s", response.Error.Message), attribute.Int("code", int(response.StatusCode))) - return errMsg + return fmt.Errorf("error waiting for container: %w", response.Error.Message) } } @@ -419,41 +399,39 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post inspection, err := r.client.ContainerInspect(ctx, cont.ID) if err != nil { - errMsg := fmt.Errorf("error inspecting container: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error inspecting container", err) - return errMsg + return fmt.Errorf("error inspecting container: %w", err) } telemetry.ReportEvent(childCtx, "inspected container") if inspection.State.Running { errMsg := fmt.Errorf("container is still running") - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "container is still running", errMsg) return errMsg } if inspection.State.ExitCode != 0 { - errMsg := fmt.Errorf("container exited with status %d: %s", inspection.State.ExitCode, inspection.State.Error) telemetry.ReportCriticalError( childCtx, - errMsg, + "container exited with status", + fmt.Errorf("%s", inspection.State.Error), attribute.Int("exit_code", inspection.State.ExitCode), attribute.String("error", inspection.State.Error), attribute.Bool("oom", inspection.State.OOMKilled), ) - return errMsg + return fmt.Errorf("container exited with status %d: %s", inspection.State.ExitCode, inspection.State.Error) } postProcessor.WriteMsg("Extracting file system") rootfsFile, err := os.Create(r.env.BuildRootfsPath()) if err != nil { - errMsg := fmt.Errorf("error creating rootfs file: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error creating rootfs file", err) - return errMsg + return fmt.Errorf("error creating rootfs file: %w", err) } telemetry.ReportEvent(childCtx, "created rootfs file") @@ -461,8 +439,7 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post defer func() { rootfsErr := rootfsFile.Close() if rootfsErr != nil { - errMsg := fmt.Errorf("error closing rootfs file: %w", rootfsErr) - telemetry.ReportError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error closing rootfs file", rootfsErr) } else { telemetry.ReportEvent(childCtx, "closed rootfs file") } @@ -477,16 +454,14 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post OutputStream: pw, }) if downloadErr != nil { - errMsg := fmt.Errorf("error downloading from container: %w", downloadErr) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error downloading from container", downloadErr) } else { telemetry.ReportEvent(childCtx, "downloaded from container") } closeErr := pw.Close() if closeErr != nil { - errMsg := fmt.Errorf("error closing pipe: %w", closeErr) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error closing pipe", closeErr) } else { telemetry.ReportEvent(childCtx, "closed pipe") } @@ -502,10 +477,9 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post r.env.BuildLogsWriter.Write([]byte(fmt.Sprintf("Build failed - exceeded maximum size %v MB.\n", maxRootfsSize>>ToMBShift))) } - errMsg := fmt.Errorf("error converting tar to ext4: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error converting tar to ext4", err) - return errMsg + return fmt.Errorf("error converting tar to ext4: %w", err) } postProcessor.WriteMsg("Filesystem cleanup") @@ -524,20 +498,18 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post err = cmd.Run() if err != nil { - errMsg := fmt.Errorf("error making rootfs file writable: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error making rootfs file writable", err) - return errMsg + return fmt.Errorf("error making rootfs file writable: %w", err) } telemetry.ReportEvent(childCtx, "made rootfs file writable") rootfsStats, err := rootfsFile.Stat() if err != nil { - errMsg := fmt.Errorf("error statting rootfs file: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error statting rootfs file", err) - return errMsg + return fmt.Errorf("error statting rootfs file: %w", err) } telemetry.ReportEvent(childCtx, "statted rootfs file") @@ -549,10 +521,9 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post err = rootfsFile.Truncate(rootfsSize) if err != nil { - errMsg := fmt.Errorf("error truncating rootfs file: %w to size of build + defaultDiskSizeMB", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error truncating rootfs file", err) - return errMsg + return fmt.Errorf("error truncating rootfs file: %w", err) } telemetry.ReportEvent(childCtx, "truncated rootfs file to size of build + defaultDiskSizeMB") @@ -570,10 +541,9 @@ func (r *Rootfs) createRootfsFile(ctx context.Context, tracer trace.Tracer, post err = cmd.Run() if err != nil { - errMsg := fmt.Errorf("error resizing rootfs file: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error resizing rootfs file", err) - return errMsg + return fmt.Errorf("error resizing rootfs file: %w", err) } telemetry.ReportEvent(childCtx, "resized rootfs file") diff --git a/packages/orchestrator/internal/template/build/snapshot_linux.go b/packages/orchestrator/internal/template/build/snapshot_linux.go index 6b132bb1e..dc6823e7f 100644 --- a/packages/orchestrator/internal/template/build/snapshot_linux.go +++ b/packages/orchestrator/internal/template/build/snapshot_linux.go @@ -200,24 +200,21 @@ func (s *Snapshot) startFCProcess( stdoutPipe, err := s.fc.StdoutPipe() if err != nil { - errMsg := fmt.Errorf("error creating fc stdout pipe: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error creating fc stdout pipe", err) - return errMsg + return fmt.Errorf("error creating fc stdout pipe: %w", err) } stderrPipe, err := s.fc.StderrPipe() if err != nil { - errMsg := fmt.Errorf("error creating fc stderr pipe: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error creating fc stderr pipe", err) closeErr := stdoutPipe.Close() if closeErr != nil { - closeErrMsg := fmt.Errorf("error closing fc stdout pipe: %w", closeErr) - telemetry.ReportError(childCtx, closeErrMsg) + telemetry.ReportError(childCtx, "error closing fc stdout pipe", closeErr) } - return errMsg + return fmt.Errorf("error creating fc stderr pipe: %w", err) } var outputWaitGroup sync.WaitGroup @@ -248,10 +245,9 @@ func (s *Snapshot) startFCProcess( err = s.fc.Start() if err != nil { - errMsg := fmt.Errorf("error starting fc process: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error starting fc process", err) - return errMsg + return fmt.Errorf("error starting fc process: %w", err) } telemetry.ReportEvent(childCtx, "started fc process") @@ -264,8 +260,7 @@ func (s *Snapshot) startFCProcess( waitErr := s.fc.Wait() if err != nil { - errMsg := fmt.Errorf("error waiting for fc process: %w", waitErr) - telemetry.ReportError(anonymousChildCtx, errMsg) + telemetry.ReportError(anonymousChildCtx, "error waiting for fc process", waitErr) } else { telemetry.ReportEvent(anonymousChildCtx, "fc process exited") } @@ -303,7 +298,7 @@ func (s *Snapshot) configureFC(ctx context.Context, tracer trace.Tracer) error { _, err := s.client.Operations.PutGuestBootSource(&bootSourceConfig) if err != nil { errMsg := fmt.Errorf("error setting fc boot source config: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error setting fc boot source config", errMsg) return errMsg } @@ -329,10 +324,9 @@ func (s *Snapshot) configureFC(ctx context.Context, tracer trace.Tracer) error { _, err = s.client.Operations.PutGuestDriveByID(&driversConfig) if err != nil { - errMsg := fmt.Errorf("error setting fc drivers config: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error setting fc drivers config", err) - return errMsg + return fmt.Errorf("error setting fc drivers config: %w", err) } telemetry.ReportEvent(childCtx, "set fc drivers config") @@ -351,10 +345,9 @@ func (s *Snapshot) configureFC(ctx context.Context, tracer trace.Tracer) error { _, err = s.client.Operations.PutGuestNetworkInterfaceByID(&networkConfig) if err != nil { - errMsg := fmt.Errorf("error setting fc network config: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error setting fc network config", err) - return errMsg + return fmt.Errorf("error setting fc network config: %w", err) } telemetry.ReportEvent(childCtx, "set fc network config") @@ -389,10 +382,9 @@ func (s *Snapshot) configureFC(ctx context.Context, tracer trace.Tracer) error { _, err = s.client.Operations.PutMachineConfiguration(&machineConfigParams) if err != nil { - errMsg := fmt.Errorf("error setting fc machine config: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error setting fc machine config", err) - return errMsg + return fmt.Errorf("error setting fc machine config: %w", err) } telemetry.ReportEvent(childCtx, "set fc machine config") @@ -408,10 +400,9 @@ func (s *Snapshot) configureFC(ctx context.Context, tracer trace.Tracer) error { _, err = s.client.Operations.PutMmdsConfig(&mmdsConfig) if err != nil { - errMsg := fmt.Errorf("error setting fc mmds config: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error setting fc mmds config", err) - return errMsg + return fmt.Errorf("error setting fc mmds config: %w", err) } telemetry.ReportEvent(childCtx, "set fc mmds config") @@ -429,10 +420,9 @@ func (s *Snapshot) configureFC(ctx context.Context, tracer trace.Tracer) error { _, err = s.client.Operations.CreateSyncAction(&startActionParams) if err != nil { - errMsg := fmt.Errorf("error starting fc: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error starting fc", err) - return errMsg + return fmt.Errorf("error starting fc: %w", err) } telemetry.ReportEvent(childCtx, "started fc") @@ -454,10 +444,9 @@ func (s *Snapshot) pauseFC(ctx context.Context, tracer trace.Tracer) error { _, err := s.client.Operations.PatchVM(&pauseConfig) if err != nil { - errMsg := fmt.Errorf("error pausing vm: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error pausing vm", err) - return errMsg + return fmt.Errorf("error pausing vm: %w", err) } telemetry.ReportEvent(childCtx, "paused fc") @@ -482,10 +471,9 @@ func (s *Snapshot) snapshotFC(ctx context.Context, tracer trace.Tracer) error { _, err := s.client.Operations.CreateSnapshot(&snapshotConfig) if err != nil { - errMsg := fmt.Errorf("error creating vm snapshot: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error creating vm snapshot", err) - return errMsg + return fmt.Errorf("error creating vm snapshot: %w", err) } telemetry.ReportEvent(childCtx, "created vm snapshot") @@ -500,8 +488,7 @@ func (s *Snapshot) cleanupFC(ctx context.Context, tracer trace.Tracer) { if s.fc != nil { err := s.fc.Cancel() if err != nil { - errMsg := fmt.Errorf("error killing fc process: %w", err) - telemetry.ReportError(childCtx, errMsg) + telemetry.ReportError(childCtx, "error killing fc process", err) } else { telemetry.ReportEvent(childCtx, "killed fc process") } @@ -509,8 +496,7 @@ func (s *Snapshot) cleanupFC(ctx context.Context, tracer trace.Tracer) { err := os.RemoveAll(s.socketPath) if err != nil { - errMsg := fmt.Errorf("error removing fc socket %w", err) - telemetry.ReportError(childCtx, errMsg) + telemetry.ReportError(childCtx, "error removing fc socket", err) } else { telemetry.ReportEvent(childCtx, "removed fc socket") } diff --git a/packages/orchestrator/internal/template/build/template.go b/packages/orchestrator/internal/template/build/template.go index 09d8ceb57..7a5a30e87 100644 --- a/packages/orchestrator/internal/template/build/template.go +++ b/packages/orchestrator/internal/template/build/template.go @@ -54,36 +54,32 @@ func (e *Env) Build(ctx context.Context, tracer trace.Tracer, postProcessor *wri err := os.MkdirAll(e.BuildDir(), 0o777) if err != nil { - errMsg := fmt.Errorf("error initializing directories for building env '%s' during build '%s': %w", e.TemplateId, e.BuildId, err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error initializing directories for building env", err) - return errMsg + return fmt.Errorf("error initializing directories for building env: %w", err) } rootfs, err := NewRootfs(childCtx, tracer, postProcessor, e, docker, legacyDocker) if err != nil { - errMsg := fmt.Errorf("error creating rootfs for env '%s' during build '%s': %w", e.TemplateId, e.BuildId, err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error creating rootfs for env", err) - return errMsg + return fmt.Errorf("error creating rootfs for env: %w", err) } network, err := NewFCNetwork(childCtx, tracer, postProcessor, e) if err != nil { - errMsg := fmt.Errorf("error network setup for FC while building env '%s' during build '%s': %w", e.TemplateId, e.BuildId, err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error network setup for FC while building env", err) - return errMsg + return fmt.Errorf("error network setup for FC while building env: %w", err) } defer network.Cleanup(childCtx, tracer) _, err = NewSnapshot(childCtx, tracer, postProcessor, e, network, rootfs) if err != nil { - errMsg := fmt.Errorf("error snapshot for env '%s' during build '%s': %w", e.TemplateId, e.BuildId, err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error snapshot for env", err) - return errMsg + return fmt.Errorf("error snapshot for env: %w", err) } return nil @@ -95,10 +91,9 @@ func (e *Env) Remove(ctx context.Context, tracer trace.Tracer) error { err := os.RemoveAll(e.BuildDir()) if err != nil { - errMsg := fmt.Errorf("error removing build dir: %w", err) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error removing build dir", err) - return errMsg + return fmt.Errorf("error removing build dir: %w", err) } telemetry.ReportEvent(childCtx, "removed build dir") diff --git a/packages/orchestrator/internal/template/template/main.go b/packages/orchestrator/internal/template/template/main.go index d80a97ec8..a09eb1b10 100644 --- a/packages/orchestrator/internal/template/template/main.go +++ b/packages/orchestrator/internal/template/template/main.go @@ -41,16 +41,14 @@ func Delete( log.Printf("template image not found in registry, skipping deletion: %v", artifactRegistryDeleteErr) telemetry.ReportEvent(childCtx, fmt.Sprintf("template image not found in registry, skipping deletion: %v", artifactRegistryDeleteErr)) } else { - errMsg := fmt.Errorf("error when deleting template image from registry: %w", artifactRegistryDeleteErr) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error when deleting template image from registry", artifactRegistryDeleteErr) } } else { telemetry.ReportEvent(childCtx, "started deleting template image from registry") waitErr := op.Wait(childCtx) if waitErr != nil { - errMsg := fmt.Errorf("error when waiting for template image deleting from registry: %w", waitErr) - telemetry.ReportCriticalError(childCtx, errMsg) + telemetry.ReportCriticalError(childCtx, "error when waiting for template image deleting from registry", waitErr) } else { telemetry.ReportEvent(childCtx, "deleted template image from registry") } diff --git a/packages/shared/pkg/models/accesstoken.go b/packages/shared/pkg/models/accesstoken.go index 7c9bf4238..02e964e4a 100644 --- a/packages/shared/pkg/models/accesstoken.go +++ b/packages/shared/pkg/models/accesstoken.go @@ -20,7 +20,7 @@ type AccessToken struct { // ID of the ent. ID uuid.UUID `json:"id,omitempty"` // AccessToken holds the value of the "access_token" field. - AccessToken string `json:"access_token,omitempty"` + AccessToken string `json:"-"` // AccessTokenHash holds the value of the "access_token_hash" field. AccessTokenHash string `json:"-"` // AccessTokenMask holds the value of the "access_token_mask" field. @@ -168,8 +168,7 @@ func (at *AccessToken) String() string { var builder strings.Builder builder.WriteString("AccessToken(") builder.WriteString(fmt.Sprintf("id=%v, ", at.ID)) - builder.WriteString("access_token=") - builder.WriteString(at.AccessToken) + builder.WriteString("access_token=") builder.WriteString(", ") builder.WriteString("access_token_hash=") builder.WriteString(", ") diff --git a/packages/shared/pkg/schema/access_token.go b/packages/shared/pkg/schema/access_token.go index bfdafe18a..078377aee 100644 --- a/packages/shared/pkg/schema/access_token.go +++ b/packages/shared/pkg/schema/access_token.go @@ -16,7 +16,7 @@ type AccessToken struct { func (AccessToken) Fields() []ent.Field { return []ent.Field{ field.UUID("id", uuid.UUID{}).Immutable().Unique().Annotations(entsql.Default("gen_random_uuid()")), - field.String("access_token").Unique().Immutable().SchemaType(map[string]string{dialect.Postgres: "text"}), + field.String("access_token").Unique().Immutable().Sensitive().SchemaType(map[string]string{dialect.Postgres: "text"}), field.String("access_token_hash").Immutable().Unique().Sensitive().SchemaType(map[string]string{dialect.Postgres: "text"}), field.String("access_token_mask").Immutable().SchemaType(map[string]string{dialect.Postgres: "text"}), field.String("name").SchemaType(map[string]string{dialect.Postgres: "text"}).Default("Unnamed Access Token"), diff --git a/packages/shared/pkg/telemetry/tracing.go b/packages/shared/pkg/telemetry/tracing.go index df782ee72..aa7748ea8 100644 --- a/packages/shared/pkg/telemetry/tracing.go +++ b/packages/shared/pkg/telemetry/tracing.go @@ -8,6 +8,7 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" ) var OTELTracingPrint = os.Getenv("OTEL_TRACING_PRINT") != "false" @@ -72,47 +73,29 @@ func ReportEvent(ctx context.Context, name string, attrs ...attribute.KeyValue) ) } -func ReportCriticalError(ctx context.Context, err error, attrs ...attribute.KeyValue) { +func ReportCriticalError(ctx context.Context, message string, err error, attrs ...attribute.KeyValue) { span := trace.SpanFromContext(ctx) - if OTELTracingPrint { - var msg string + debugID := getDebugID(ctx) + zap.L().Error(message, zap.Stringp("debug_id", debugID), zap.Error(err), zap.Any("attrs", attrs)) - if len(attrs) == 0 { - msg = fmt.Sprintf("Critical error: %v\n", err) - } else { - msg = fmt.Sprintf("Critical error: %v - %#v\n", err, attrs) - } - - debugID := getDebugID(ctx) - fmt.Fprint(os.Stderr, debugFormat(debugID, msg)) - } + errorAttrs := append(attrs, attribute.String("error.message", message)) span.RecordError(err, trace.WithStackTrace(true), trace.WithAttributes( - attrs..., + errorAttrs..., ), ) - span.SetStatus(codes.Error, "critical error") + span.SetStatus(codes.Error, message) } -func ReportError(ctx context.Context, err error, attrs ...attribute.KeyValue) { +func ReportError(ctx context.Context, message string, err error, attrs ...attribute.KeyValue) { span := trace.SpanFromContext(ctx) - if OTELTracingPrint { - var msg string - - if len(attrs) == 0 { - msg = fmt.Sprintf("Error: %v\n", err) - } else { - msg = fmt.Sprintf("Error: %v - %#v\n", err, attrs) - } - - debugID := getDebugID(ctx) - fmt.Fprint(os.Stderr, debugFormat(debugID, msg)) - } + debugID := getDebugID(ctx) + zap.L().Warn(message, zap.Stringp("debug_id", debugID), zap.Error(err), zap.Any("attrs", attrs)) span.RecordError(err, trace.WithStackTrace(true), @@ -127,6 +110,7 @@ func GetContextFromRemote(ctx context.Context, tracer trace.Tracer, name, spanID if traceIDErr != nil { ReportError( ctx, + traceIDErr.Error(), traceIDErr, attribute.String("trace.id", traceID), attribute.Int("trace.id.length", len(traceID)), @@ -137,6 +121,7 @@ func GetContextFromRemote(ctx context.Context, tracer trace.Tracer, name, spanID if spanIDErr != nil { ReportError( ctx, + spanIDErr.Error(), spanIDErr, attribute.String("span.id", spanID), attribute.Int("span.id.length", len(spanID)),