diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 0f70a1a3d040e..9b701cec1fafc 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1002,6 +1002,15 @@ LEVEL = Info ;; Allow fork repositories without maximum number limit ;ALLOW_FORK_WITHOUT_MAXIMUM_LIMIT = true +; +;; Enable applying a global size limit defined by REPO_SIZE_LIMIT. Each repository can have a value that overrides the global limit +;; "false" means no limit will be enforced, even if specified on a repository +;ENABLE_SIZE_LIMIT = false +; +;; Specify a global repository size limit in bytes to apply for each repository. 0 - No limit +;; If repository has it's own limit set in UI it will override the global setting +;; Standard units of measurements for size can be used like B, KB, KiB, ... , EB, EiB, ... +;REPO_SIZE_LIMIT = 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/administration/repo-size-limit.en-us.md b/docs/content/administration/repo-size-limit.en-us.md new file mode 100644 index 0000000000000..7e355312941db --- /dev/null +++ b/docs/content/administration/repo-size-limit.en-us.md @@ -0,0 +1,52 @@ +--- +date: "2023-05-24T13:00:00+00:00" +title: "Per repository size limit" +slug: "repo-size-limit" +weight: 12 +toc: false +draft: false +aliases: + - /en-us/repo-size-limit +menu: + sidebar: + parent: "administration" + name: "Per repository size limit" + weight: 12 + identifier: "repo-size-limit" +--- + +# Gitea per repository size limit setup + +To use Gitea's experimental built-in per repository size limit support, Administrator must update the `app.ini` file: + +```ini +;; Enable applying a global size limit defined by REPO_SIZE_LIMIT. Each repository can have a value that overrides the global limit +;; "false" means no limit will be enforced, even if specified on a repository +ENABLE_SIZE_LIMIT = true + +;; Specify a global repository size limit in bytes to apply for each repository. 0 - No limit +;; If repository has it's own limit set in UI it will override the global setting +;; Standard units of measurements for size can be used like B, KB, KiB, ... , EB, EiB, ... +REPO_SIZE_LIMIT = 500 MB + +This setting is persistent. + +The size limitation is triggered when repository `disk size` + `new commit size` > `defined repository size limit` + +If size limitation is triggered the feature would prevent commits that increase repository size on disk +of gitea server and allow those that decrease it + +# Gitea per repository size limit setup in UI + +1. For Gitea admin it is possible during runtime to enable/disable limit size feature, change the global size limit on the fly. +**This setting is not persistent across restarts** + +`Admin panel/Site settings` -> `Repository management` + +Persistance can be achieved if the limit is maintained by editing `app.ini` file + +2. The individually set per repository limit in `Settings` of the +repository would take precedence over global limit when the size limit +feature is enabled. Only admin can modify those limits + +**Note**: Size checking for large repositories is time consuming operation so time of push under size limit might increase up to a minute depending on your server hardware diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go index 13551423ce470..b6299723e371f 100644 --- a/models/migrations/migrations.go +++ b/models/migrations/migrations.go @@ -601,6 +601,8 @@ var migrations = []Migration{ NewMigration("Add metadata column for comment table", v1_23.AddCommentMetaDataColumn), // v304 -> v305 NewMigration("Add index for release sha1", v1_23.AddIndexForReleaseSha1), + // to modify later + NewMigration("Add size limit on repository", v1_22.AddSizeLimitOnRepo), } // GetCurrentDBVersion returns the current db version diff --git a/models/migrations/v1_22/v999.go b/models/migrations/v1_22/v999.go new file mode 100644 index 0000000000000..85d12b29815cd --- /dev/null +++ b/models/migrations/v1_22/v999.go @@ -0,0 +1,17 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package v1_22 //nolint + +import ( + "xorm.io/xorm" +) + +func AddSizeLimitOnRepo(x *xorm.Engine) error { + type Repository struct { + ID int64 `xorm:"pk autoincr"` + SizeLimit int64 `xorm:"NOT NULL DEFAULT 0"` + } + + return x.Sync2(new(Repository)) +} diff --git a/models/repo/repo.go b/models/repo/repo.go index 68f8e16a21d58..cfb7aeb982daf 100644 --- a/models/repo/repo.go +++ b/models/repo/repo.go @@ -175,7 +175,9 @@ type Repository struct { BaseRepo *Repository `xorm:"-"` IsTemplate bool `xorm:"INDEX NOT NULL DEFAULT false"` TemplateID int64 `xorm:"INDEX"` + SizeLimit int64 `xorm:"NOT NULL DEFAULT 0"` Size int64 `xorm:"NOT NULL DEFAULT 0"` + EnableSizeLimit bool `xorm:"NOT NULL DEFAULT true"` GitSize int64 `xorm:"NOT NULL DEFAULT 0"` LFSSize int64 `xorm:"NOT NULL DEFAULT 0"` CodeIndexerStatus *RepoIndexerStatus `xorm:"-"` @@ -584,6 +586,27 @@ func (repo *Repository) IsOwnedBy(userID int64) bool { return repo.OwnerID == userID } +// GetActualSizeLimit returns repository size limit in bytes +// or global repository limit setting if per repository size limit is not set +func (repo *Repository) GetActualSizeLimit() int64 { + sizeLimit := repo.SizeLimit + if setting.RepoSizeLimit > 0 && sizeLimit == 0 { + sizeLimit = setting.RepoSizeLimit + } + return sizeLimit +} + +// RepoSizeIsOversized return true if is over size limitation +func (repo *Repository) IsRepoSizeOversized(additionalSize int64) bool { + return setting.EnableSizeLimit && repo.GetActualSizeLimit() > 0 && repo.GitSize+additionalSize > repo.GetActualSizeLimit() +} + +// RepoSizeLimitEnabled return true if size limit checking is enabled and limit is non zero for this specific repository +// this is used to enable size checking during pre-receive hook +func (repo *Repository) IsRepoSizeLimitEnabled() bool { + return setting.EnableSizeLimit && repo.GetActualSizeLimit() > 0 +} + // CanCreateBranch returns true if repository meets the requirements for creating new branches. func (repo *Repository) CanCreateBranch() bool { return !repo.IsMirror diff --git a/modules/base/tool.go b/modules/base/tool.go index 9e43030f40019..393a1b6ded002 100644 --- a/modules/base/tool.go +++ b/modules/base/tool.go @@ -119,6 +119,13 @@ func FileSize(s int64) string { return humanize.IBytes(uint64(s)) } +// Get FileSize bytes value from String. +func GetFileSize(s string) (int64, error) { + v, err := humanize.ParseBytes(s) + iv := int64(v) + return iv, err +} + // EllipsisString returns a truncated short string, // it appends '...' in the end of the length of string is too large. func EllipsisString(str string, length int) string { diff --git a/modules/base/tool_test.go b/modules/base/tool_test.go index 4af8b9bc4d528..f056a8e4f27f6 100644 --- a/modules/base/tool_test.go +++ b/modules/base/tool_test.go @@ -114,6 +114,13 @@ func TestFileSize(t *testing.T) { assert.Equal(t, "2.0 EiB", FileSize(size)) } +func TestGetFileSize(t *testing.T) { + var size int64 = 512 * 1024 * 1024 * 1024 + s, err := GetFileSize("512 GiB") + assert.Equal(t, s, size) + assert.Nil(t, err) +} + func TestEllipsisString(t *testing.T) { assert.Equal(t, "...", EllipsisString("foobar", 0)) assert.Equal(t, "...", EllipsisString("foobar", 1)) diff --git a/modules/git/repo.go b/modules/git/repo.go index 1c223018adddc..bf0c9f53e96eb 100644 --- a/modules/git/repo.go +++ b/modules/git/repo.go @@ -236,6 +236,74 @@ func Push(ctx context.Context, repoPath string, opts PushOptions) error { return nil } +// CountObject represents repository count objects report +type CountObject struct { + Count int64 + Size int64 + InPack int64 + Packs int64 + SizePack int64 + PrunePack int64 + Garbage int64 + SizeGarbage int64 +} + +const ( + statCount = "count: " + statSize = "size: " + statInpack = "in-pack: " + statPacks = "packs: " + statSizePack = "size-pack: " + statPrunePackage = "prune-package: " + statGarbage = "garbage: " + statSizeGarbage = "size-garbage: " +) + +// CountObjects returns the results of git count-objects on the repoPath +func CountObjects(ctx context.Context, repoPath string) (*CountObject, error) { + return CountObjectsWithEnv(ctx, repoPath, nil) +} + +// CountObjectsWithEnv returns the results of git count-objects on the repoPath with custom env setup +func CountObjectsWithEnv(ctx context.Context, repoPath string, env []string) (*CountObject, error) { + cmd := NewCommand(ctx, "count-objects", "-v") + stdout, _, err := cmd.RunStdString(&RunOpts{Dir: repoPath, Env: env}) + if err != nil { + return nil, err + } + + return parseSize(stdout), nil +} + +// parseSize parses the output from count-objects and return a CountObject +func parseSize(objects string) *CountObject { + repoSize := new(CountObject) + for _, line := range strings.Split(objects, "\n") { + switch { + case strings.HasPrefix(line, statCount): + repoSize.Count, _ = strconv.ParseInt(line[7:], 10, 64) + case strings.HasPrefix(line, statSize): + number, _ := strconv.ParseInt(line[6:], 10, 64) + repoSize.Size = number * 1024 + case strings.HasPrefix(line, statInpack): + repoSize.InPack, _ = strconv.ParseInt(line[9:], 10, 64) + case strings.HasPrefix(line, statPacks): + repoSize.Packs, _ = strconv.ParseInt(line[7:], 10, 64) + case strings.HasPrefix(line, statSizePack): + number, _ := strconv.ParseInt(line[11:], 10, 64) + repoSize.SizePack = number * 1024 + case strings.HasPrefix(line, statPrunePackage): + repoSize.PrunePack, _ = strconv.ParseInt(line[16:], 10, 64) + case strings.HasPrefix(line, statGarbage): + repoSize.Garbage, _ = strconv.ParseInt(line[9:], 10, 64) + case strings.HasPrefix(line, statSizeGarbage): + number, _ := strconv.ParseInt(line[14:], 10, 64) + repoSize.SizeGarbage = number * 1024 + } + } + return repoSize +} + // GetLatestCommitTime returns time for latest commit in repository (across all branches) func GetLatestCommitTime(ctx context.Context, repoPath string) (time.Time, error) { cmd := NewCommand(ctx, "for-each-ref", "--sort=-committerdate", BranchPrefix, "--count", "1", "--format=%(committerdate)") diff --git a/modules/setting/repository.go b/modules/setting/repository.go index 8656ebc7ecfd0..a35e03f947d75 100644 --- a/modules/setting/repository.go +++ b/modules/setting/repository.go @@ -10,6 +10,8 @@ import ( "strings" "code.gitea.io/gitea/modules/log" + + "github.com/dustin/go-humanize" ) // enumerates all the policy repository creating @@ -268,12 +270,35 @@ var ( } RepoRootPath string ScriptType = "bash" + + EnableSizeLimit = true + RepoSizeLimit int64 ) +func SaveGlobalRepositorySetting(enableSizeLimit bool, repoSizeLimit int64) error { + EnableSizeLimit = enableSizeLimit + RepoSizeLimit = repoSizeLimit + sec := CfgProvider.Section("repository") + if EnableSizeLimit { + sec.Key("ENABLE_SIZE_LIMIT").SetValue("true") + } else { + sec.Key("ENABLE_SIZE_LIMIT").SetValue("false") + } + + sec.Key("REPO_SIZE_LIMIT").SetValue(humanize.Bytes(uint64(RepoSizeLimit))) + return nil +} + func loadRepositoryFrom(rootCfg ConfigProvider) { var err error + // Determine and create root git repository path. sec := rootCfg.Section("repository") + EnableSizeLimit = sec.Key("ENABLE_SIZE_LIMIT").MustBool(false) + + v, _ := humanize.ParseBytes(sec.Key("REPO_SIZE_LIMIT").MustString("0")) + RepoSizeLimit = int64(v) + Repository.DisableHTTPGit = sec.Key("DISABLE_HTTP_GIT").MustBool() Repository.UseCompatSSHURI = sec.Key("USE_COMPAT_SSH_URI").MustBool() Repository.GoGetCloneURLProtocol = sec.Key("GO_GET_CLONE_URL_PROTOCOL").MustString("https") diff --git a/modules/structs/repo.go b/modules/structs/repo.go index 444967c3e7586..575737736be2d 100644 --- a/modules/structs/repo.go +++ b/modules/structs/repo.go @@ -148,6 +148,8 @@ type CreateRepoOption struct { // ObjectFormatName of the underlying git repository // enum: sha1,sha256 ObjectFormatName string `json:"object_format_name" binding:"MaxSize(6)"` + // SizeLimit of the repository + SizeLimit int64 `json:"size_limit"` } // EditRepoOption options when editing a repository's properties @@ -216,6 +218,8 @@ type EditRepoOption struct { DefaultAllowMaintainerEdit *bool `json:"default_allow_maintainer_edit,omitempty"` // set to `true` to archive this repository. Archived *bool `json:"archived,omitempty"` + // SizeLimit of the repository. + SizeLimit *int64 `json:"size_limit,omitempty"` // set to a string like `8h30m0s` to set the mirror interval time MirrorInterval *string `json:"mirror_interval,omitempty"` // enable prune - remove obsolete remote-tracking references when mirroring diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 7d8eb1a475399..7718fc1c033fb 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1003,6 +1003,7 @@ owner_helper = Some organizations may not show up in the dropdown due to a maxim repo_name = Repository Name repo_name_helper = Good repository names use short, memorable and unique keywords. repo_size = Repository Size +repo_size_limit = Repository Size Limit template = Template template_select = Select a template. template_helper = Make repository a template @@ -1132,6 +1133,8 @@ form.reach_limit_of_creation_1 = The owner has already reached the limit of %d r form.reach_limit_of_creation_n = The owner has already reached the limit of %d repositories. form.name_reserved = The repository name "%s" is reserved. form.name_pattern_not_allowed = The pattern "%s" is not allowed in a repository name. +form.repo_size_limit_negative = Repository size limitation cannot be negative. +form.repo_size_limit_only_by_admins = Only administrators can change the repository size limitation. need_auth = Authorization migrate_options = Migration Options @@ -3290,6 +3293,13 @@ config.disable_gravatar = Disable Gravatar config.enable_federated_avatar = Enable Federated Avatars config.open_with_editor_app_help = The "Open with" editors for the clone menu. If left empty, the default will be used. Expand to see the default. +config.repository_config = Repository Configuration +config.enable_size_limit = Enable Size Limit +config.repo_size_limit = Default Repository Size Limit +config.invalid_repo_size = Invalid repository size %s +config.save_repo_size_setting_failed = Failed to save global repository settings %s +config.repository_setting_success = Global repository setting has been updated + config.git_config = Git Configuration config.git_disable_diff_highlight = Disable Diff Syntax Highlight config.git_max_diff_lines = Max Diff Lines (for a single file) diff --git a/routers/api/v1/repo/repo.go b/routers/api/v1/repo/repo.go index 1bcec8fcf7e72..2bff0b66fea77 100644 --- a/routers/api/v1/repo/repo.go +++ b/routers/api/v1/repo/repo.go @@ -258,6 +258,7 @@ func CreateUserRepo(ctx *context.APIContext, owner *user_model.User, opt api.Cre TrustModel: repo_model.ToTrustModel(opt.TrustModel), IsTemplate: opt.Template, ObjectFormatName: opt.ObjectFormatName, + SizeLimit: opt.SizeLimit, }) if err != nil { if repo_model.IsErrRepoAlreadyExist(err) { @@ -743,6 +744,10 @@ func updateBasicProperties(ctx *context.APIContext, opts api.EditRepoOption) err repo.DefaultBranch = *opts.DefaultBranch } + if opts.SizeLimit != nil { + repo.SizeLimit = *opts.SizeLimit + } + if err := repo_service.UpdateRepository(ctx, repo, visibilityChanged); err != nil { ctx.Error(http.StatusInternalServerError, "UpdateRepository", err) return err diff --git a/routers/private/hook_pre_receive.go b/routers/private/hook_pre_receive.go index 73fe9b886cff8..f4aaa207bedf1 100644 --- a/routers/private/hook_pre_receive.go +++ b/routers/private/hook_pre_receive.go @@ -7,6 +7,11 @@ import ( "fmt" "net/http" "os" + "path/filepath" + "strconv" + "strings" + "sync" + "time" "code.gitea.io/gitea/models" asymkey_model "code.gitea.io/gitea/models/asymkey" @@ -16,6 +21,7 @@ import ( access_model "code.gitea.io/gitea/models/perm/access" "code.gitea.io/gitea/models/unit" user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/private" @@ -101,8 +107,273 @@ func (ctx *preReceiveContext) AssertCreatePullRequest() bool { return true } +// calculateSizeOfObject calculates the size of one git object via git cat-file -s command +func calculateSizeOfObject(ctx *gitea_context.PrivateContext, opts *git.RunOpts, objectID string) (objectSize int64) { + objectSizeStr, _, err := git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(objectID).RunStdString(opts) + if err != nil { + log.Trace("CalculateSizeOfRemovedObjects: Error during git cat-file -s on object: %s", objectID) + return objectSize + } + + objectSize, _ = strconv.ParseInt(strings.TrimSpace(objectSizeStr), 10, 64) + if err != nil { + log.Trace("CalculateSizeOfRemovedObjects: Error during ParseInt on string '%s'", objectID) + return objectSize + } + return objectSize +} + +// calculateSizeOfObjectsFromCache calculates the size of objects added and removed from the repository by new push +// it uses data that was cached about the repository for this run +func calculateSizeOfObjectsFromCache(newCommitObjects, oldCommitObjects, otherCommitObjects map[string]bool, commitObjectsSizes map[string]int64) (addedSize, removedSize int64) { + // Calculate size of objects that were added + for objectID := range newCommitObjects { + if _, exists := oldCommitObjects[objectID]; !exists { + // objectID is not referenced in the list of objects of old commit so it is a new object + // Calculate its size and add it to the addedSize + addedSize += commitObjectsSizes[objectID] + } + // We might check here if new object is not already in the rest of repo to be precise + // However our goal is to prevent growth of repository so on determination of addedSize + // We can skip this preciseness, addedSize will be more then real addedSize + // TODO - do not count size of object that is referenced in other part of repo but not referenced neither in old nor new commit + // git will not add the object twice + } + + // Calculate size of objects that were removed + for objectID := range oldCommitObjects { + if _, exists := newCommitObjects[objectID]; !exists { + // objectID is not referenced in the list of new commit objects so it was possibly removed + if _, exists := otherCommitObjects[objectID]; !exists { + // objectID is not referenced in rest of the objects of the repository so it was removed + // Calculate its size and add it to the removedSize + removedSize += commitObjectsSizes[objectID] + } + } + } + return addedSize, removedSize +} + +// convertObjectsToMap takes a newline-separated string of git objects and +// converts it into a map for efficient lookup. +func convertObjectsToMap(objects string) map[string]bool { + objectsMap := make(map[string]bool) + for _, object := range strings.Split(objects, "\n") { + if len(object) == 0 { + continue + } + objectID := strings.Split(object, " ")[0] + objectsMap[objectID] = true + } + return objectsMap +} + +// convertObjectsToSlice converts a list of hashes in a string from the git rev-list --objects command to a slice of string objects +func convertObjectsToSlice(objects string) (objectIDs []string) { + for _, object := range strings.Split(objects, "\n") { + if len(object) == 0 { + continue + } + objectID := strings.Split(object, " ")[0] + objectIDs = append(objectIDs, objectID) + } + return objectIDs +} + +// loadObjectSizesFromPack access all packs that this push or repo has +// and load compressed object size in bytes into objectSizes map +// using `git verify-pack -v` output +func loadObjectSizesFromPack(ctx *gitea_context.PrivateContext, opts *git.RunOpts, objectIDs []string, objectsSizes map[string]int64) error { + // Find the path from GIT_QUARANTINE_PATH environment variable (path to the pack file) + var packPath string + for _, envVar := range opts.Env { + split := strings.SplitN(envVar, "=", 2) + if split[0] == "GIT_QUARANTINE_PATH" { + packPath = split[1] + break + } + } + + // if no quarantinPath determined we silently ignore + if packPath == "" { + log.Trace("GIT_QUARANTINE_PATH not found in the environment variables. Will read the pack files from main repo instead") + packPath = filepath.Join(ctx.Repo.Repository.RepoPath(), "./objects/") + } + log.Warn("packPath: %s", packPath) + + // Find all pack files *.idx in the quarantine directory + packFiles, err := filepath.Glob(filepath.Join(packPath, "./pack/*.idx")) + // if pack file not found we silently ignore + if err != nil { + log.Trace("Error during finding pack files %s: %v", filepath.Join(packPath, "./pack/*.idx"), err) + } + + // Loop over each pack file + i := 0 + for _, packFile := range packFiles { + log.Trace("Processing packfile %s", packFile) + // Extract and store in cache objectsSizes the sizes of the object parsing output of the `git verify-pack` command + output, _, err := git.NewCommand(ctx, "verify-pack", "-v").AddDynamicArguments(packFile).RunStdString(opts) + if err != nil { + log.Trace("Error during git verify-pack on pack file: %s", packFile) + continue + } + + // Parsing the output of the git verify-pack command + lines := strings.Split(output, "\n") + for _, line := range lines { + fields := strings.Fields(line) + if len(fields) < 4 { + continue + } + + // Second field has object type + // If object type is not known filter it out and do not process + objectType := fields[1] + if objectType != "commit" && objectType != "tree" && objectType != "blob" && objectType != "tag" { + continue + } + + // First field would have an object hash + objectID := fields[0] + + // Forth field would have an object compressed size + size, err := strconv.ParseInt(fields[3], 10, 64) + if err != nil { + log.Trace("Failed to parse size for object %s: %v", objectID, err) + continue + } + i++ + objectsSizes[objectID] = size + } + } + + log.Trace("Loaded %d items from packfiles", i) + return nil +} + +// loadObjectsSizesViaCatFile uses hashes from objectIDs and runs `git cat-file -s` in 10 workers to return each object sizes +// Objects for which size is already loaded are skipped +// can't use `git cat-file --batch-check` here as it only provides data from git DB before the commit applied and has no knowledge on new commit objects +func loadObjectsSizesViaCatFile(ctx *gitea_context.PrivateContext, opts *git.RunOpts, objectIDs []string, objectsSizes map[string]int64) error { + // This is the number of workers that will simultaneously process CalculateSizeOfObject. + const numWorkers = 10 + + var wg sync.WaitGroup + var mu sync.Mutex + + // Prepare numWorker slices to store the work + reducedObjectIDs := make([][]string, numWorkers) + for i := 0; i < numWorkers; i++ { + reducedObjectIDs[i] = make([]string, 0, len(objectIDs)/numWorkers+1) + } + + // Loop over all objectIDs and find which ones are missing size information + i := 0 + for _, objectID := range objectIDs { + _, exists := objectsSizes[objectID] + + // If object doesn't yet have size in objectsSizes add it for further processing + if !exists { + reducedObjectIDs[i%numWorkers] = append(reducedObjectIDs[i%numWorkers], objectID) + i++ + } + } + + // Start workers and determine size using `git cat-file -s` store in objectsSizes cache + for w := 1; w <= numWorkers; w++ { + wg.Add(1) + go func(reducedObjectIDs *[]string) { + defer wg.Done() + for _, objectID := range *reducedObjectIDs { + ctx := ctx + // Create a copy of opts to allow change of the Env property + tsopts := *opts + // Ensure that each worker has its own copy of the Env environment to prevent races + tsopts.Env = append([]string(nil), opts.Env...) + objectSize := calculateSizeOfObject(ctx, &tsopts, objectID) + mu.Lock() // Protecting shared resource + objectsSizes[objectID] = objectSize + mu.Unlock() // Releasing shared resource for other goroutines + } + }(&reducedObjectIDs[(w-1)%numWorkers]) + } + + // Wait for all workers to finish processing. + wg.Wait() + + return nil +} + +// loadObjectsSizesViaBatch uses hashes from objectIDs and uses pre-opened `git cat-file --batch-check` command to slice and return each object sizes +// This function can't be used for new commit objects. +// It speeds up loading object sizes from existing git database of the repository avoiding +// multiple `git cat-files -s` +func loadObjectsSizesViaBatch(ctx *gitea_context.PrivateContext, repoPath string, objectIDs []string, objectsSizes map[string]int64) error { + var i int32 + + reducedObjectIDs := make([]string, 0, len(objectIDs)) + + // Loop over all objectIDs and find which ones are missing size information + for _, objectID := range objectIDs { + _, exists := objectsSizes[objectID] + + // If object doesn't yet have size in objectsSizes add it for further processing + if !exists { + reducedObjectIDs = append(reducedObjectIDs, objectID) + } + } + + wr, rd, cancel := git.CatFileBatchCheck(ctx, repoPath) + defer cancel() + + for _, commitID := range reducedObjectIDs { + _, err := wr.Write([]byte(commitID + "\n")) + if err != nil { + return err + } + i++ + line, err := rd.ReadString('\n') + if err != nil { + return err + } + if len(line) == 1 { + line, err = rd.ReadString('\n') + if err != nil { + return err + } + } + fields := strings.Fields(line) + objectID := fields[0] + if len(fields) < 3 || len(fields) > 3 { + log.Trace("String '%s' does not contain size ignored %s: %v", line, objectID, err) + continue + } + sizeStr := fields[2] + size, err := parseSize(sizeStr) + if err != nil { + log.Trace("String '%s' Failed to parse size for object %s: %v", line, objectID, err) + continue + } + objectsSizes[objectID] = size + } + + return nil +} + +// parseSize parses the object size from a string +func parseSize(sizeStr string) (int64, error) { + size, err := strconv.ParseInt(sizeStr, 10, 64) + if err != nil { + return 0, fmt.Errorf("failed to parse object size: %w", err) + } + return size, nil +} + // HookPreReceive checks whether a individual commit is acceptable func HookPreReceive(ctx *gitea_context.PrivateContext) { + startTime := time.Now() + opts := web.GetForm(ctx).(*private.HookOptions) ourCtx := &preReceiveContext{ @@ -111,12 +382,164 @@ func HookPreReceive(ctx *gitea_context.PrivateContext) { opts: opts, } + repo := ourCtx.Repo.Repository + + var addedSize int64 + var removedSize int64 + var isRepoOversized bool + var pushSize *git.CountObject + var repoSize *git.CountObject + var err error + var duration time.Duration + + if repo.IsRepoSizeLimitEnabled() { + + // Calculating total size of the repo using `git count-objects` + repoSize, err = git.CountObjects(ctx, repo.RepoPath()) + if err != nil { + log.Error("Unable to get repository size with env %v: %s Error: %v", repo.RepoPath(), ourCtx.env, err) + ctx.JSON(http.StatusInternalServerError, map[string]interface{}{ + "err": err.Error(), + }) + return + } + + // Calculating total size of the push using `git count-objects` + pushSize, err = git.CountObjectsWithEnv(ctx, repo.RepoPath(), ourCtx.env) + if err != nil { + log.Error("Unable to get push size with env %v: %s Error: %v", repo.RepoPath(), ourCtx.env, err) + ctx.JSON(http.StatusInternalServerError, map[string]interface{}{ + "err": err.Error(), + }) + return + } + + // Cache whether the repository would breach the size limit after the operation + isRepoOversized = repo.IsRepoSizeOversized(pushSize.Size + pushSize.SizePack) + log.Warn("Push counts %+v", pushSize) + log.Warn("Repo counts %+v", repoSize) + } + // Iterate across the provided old commit IDs for i := range opts.OldCommitIDs { oldCommitID := opts.OldCommitIDs[i] newCommitID := opts.NewCommitIDs[i] refFullName := opts.RefFullNames[i] + log.Trace("Processing old commit: %s, new commit: %s, ref: %s", oldCommitID, newCommitID, refFullName) + + // If operation is in potential breach of size limit prepare data for analysis + if isRepoOversized { + var gitObjects string + var error error + + // Create cache of objects in old commit + // if oldCommitID all 0 then it's a fresh repository on gitea server and all git operations on such oldCommitID would fail + if oldCommitID != "0000000000000000000000000000000000000000" { + gitObjects, _, err = git.NewCommand(ctx, "rev-list", "--objects").AddDynamicArguments(oldCommitID).RunStdString(&git.RunOpts{Dir: repo.RepoPath(), Env: ourCtx.env}) + if err != nil { + log.Error("Unable to list objects in old commit: %s in %-v Error: %v", oldCommitID, repo, err) + ctx.JSON(http.StatusInternalServerError, private.Response{ + Err: fmt.Sprintf("Fail to list objects in old commit: %v", err), + }) + return + } + } + + commitObjectsSizes := make(map[string]int64) + oldCommitObjects := convertObjectsToMap(gitObjects) + objectIDs := convertObjectsToSlice(gitObjects) + + // Create cache of objects that are in the repository but not part of old or new commit + // if oldCommitID all 0 then it's a fresh repository on gitea server and all git operations on such oldCommitID would fail + if oldCommitID == "0000000000000000000000000000000000000000" { + gitObjects, _, err = git.NewCommand(ctx, "rev-list", "--objects", "--all").AddDynamicArguments("^" + newCommitID).RunStdString(&git.RunOpts{Dir: repo.RepoPath(), Env: ourCtx.env}) + if err != nil { + log.Error("Unable to list objects in the repo that are missing from both old %s and new %s commits in %-v Error: %v", oldCommitID, newCommitID, repo, err) + ctx.JSON(http.StatusInternalServerError, private.Response{ + Err: fmt.Sprintf("Fail to list objects missing from both old and new commits: %v", err), + }) + return + } + } else { + gitObjects, _, err = git.NewCommand(ctx, "rev-list", "--objects", "--all").AddDynamicArguments("^"+oldCommitID, "^"+newCommitID).RunStdString(&git.RunOpts{Dir: repo.RepoPath(), Env: ourCtx.env}) + if err != nil { + log.Error("Unable to list objects in the repo that are missing from both old %s and new %s commits in %-v Error: %v", oldCommitID, newCommitID, repo, err) + ctx.JSON(http.StatusInternalServerError, private.Response{ + Err: fmt.Sprintf("Fail to list objects missing from both old and new commits: %v", err), + }) + return + } + + } + + otherCommitObjects := convertObjectsToMap(gitObjects) + objectIDs = append(objectIDs, convertObjectsToSlice(gitObjects)...) + // Unfortunately `git cat-file --check-batch` shows full object size + // so we would load compressed sizes from pack file via `git verify-pack -v` if there are pack files in repo + // The result would still miss items that are loose as individual objects (not part of pack files) + if repoSize.InPack > 0 { + error = loadObjectSizesFromPack(ctx, &git.RunOpts{Dir: repo.RepoPath(), Env: nil}, objectIDs, commitObjectsSizes) + if error != nil { + log.Error("Unable to get sizes of objects from the pack in %-v Error: %v", repo, error) + ctx.JSON(http.StatusInternalServerError, private.Response{ + Err: fmt.Sprintf("Fail to get sizes of objects in repo: %v", err), + }) + return + } + } + + // Load loose objects that are missing + error = loadObjectsSizesViaBatch(ctx, repo.RepoPath(), objectIDs, commitObjectsSizes) + if error != nil { + log.Error("Unable to get sizes of objects that are missing in both old %s and new commits %s in %-v Error: %v", oldCommitID, newCommitID, repo, error) + ctx.JSON(http.StatusInternalServerError, private.Response{ + Err: fmt.Sprintf("Fail to get sizes of objects missing in both old and new commit and those in old commit: %v", err), + }) + return + } + + // Create cache of objects in new commit + gitObjects, _, err = git.NewCommand(ctx, "rev-list", "--objects").AddDynamicArguments(newCommitID).RunStdString(&git.RunOpts{Dir: repo.RepoPath(), Env: ourCtx.env}) + if err != nil { + log.Error("Unable to list objects in new commit %s in %-v Error: %v", newCommitID, repo, err) + ctx.JSON(http.StatusInternalServerError, private.Response{ + Err: fmt.Sprintf("Fail to list objects in new commit: %v", err), + }) + return + } + + newCommitObjects := convertObjectsToMap(gitObjects) + objectIDs = convertObjectsToSlice(gitObjects) + // Unfortunately `git cat-file --check-batch` doesn't work on objects not yet accepted into git database + // so the sizes will be calculated through pack file `git verify-pack -v` if there are pack files + // The result would still miss items that were sent loose as individual objects (not part of pack files) + if pushSize.InPack > 0 { + error = loadObjectSizesFromPack(ctx, &git.RunOpts{Dir: repo.RepoPath(), Env: ourCtx.env}, objectIDs, commitObjectsSizes) + if error != nil { + log.Error("Unable to get sizes of objects from the pack in new commit %s in %-v Error: %v", newCommitID, repo, error) + ctx.JSON(http.StatusInternalServerError, private.Response{ + Err: fmt.Sprintf("Fail to get sizes of objects in new commit: %v", err), + }) + return + } + } + + // After loading everything we could from pack file, objects could have been sent as loose bunch as well + // We need to load them individually with `git cat-file -s` on any object that is missing from accumulated size cache commitObjectsSizes + error = loadObjectsSizesViaCatFile(ctx, &git.RunOpts{Dir: repo.RepoPath(), Env: ourCtx.env}, objectIDs, commitObjectsSizes) + if error != nil { + log.Error("Unable to get sizes of objects in new commit %s in %-v Error: %v", newCommitID, repo, error) + ctx.JSON(http.StatusInternalServerError, private.Response{ + Err: fmt.Sprintf("Fail to get sizes of objects in new commit: %v", err), + }) + return + } + + // Calculate size that was added and removed by the new commit + addedSize, removedSize = calculateSizeOfObjectsFromCache(newCommitObjects, oldCommitObjects, otherCommitObjects, commitObjectsSizes) + } + switch { case refFullName.IsBranch(): preReceiveBranch(ourCtx, oldCommitID, newCommitID, refFullName) @@ -132,6 +555,20 @@ func HookPreReceive(ctx *gitea_context.PrivateContext) { } } + if repo.IsRepoSizeLimitEnabled() { + duration = time.Since(startTime) + log.Warn("During size checking - Addition in size is: %d, removal in size is: %d, limit size: %d, push size: %d, repo size: %d. Took %s seconds.", addedSize, removedSize, repo.GetActualSizeLimit(), pushSize.Size+pushSize.SizePack, repo.GitSize, duration) + } + + // If total of commits add more size then they remove and we are in a potential breach of size limit -- abort + if (addedSize > removedSize) && isRepoOversized { + log.Warn("Forbidden: new repo size %s would be over limitation of %s. Push size: %s. Took %s seconds. addedSize: %s. removedSize: %s", base.FileSize(repo.GitSize+addedSize-removedSize), base.FileSize(repo.GetActualSizeLimit()), base.FileSize(pushSize.Size+pushSize.SizePack), duration, base.FileSize(addedSize), base.FileSize(removedSize)) + ctx.JSON(http.StatusForbidden, private.Response{ + UserMsg: fmt.Sprintf("New repository size is over limitation of %s", base.FileSize(repo.GetActualSizeLimit())), + }) + return + } + ctx.PlainText(http.StatusOK, "ok") } diff --git a/routers/web/admin/repos.go b/routers/web/admin/repos.go index e7c27145dce9d..94995d40526eb 100644 --- a/routers/web/admin/repos.go +++ b/routers/web/admin/repos.go @@ -16,8 +16,10 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/modules/web" "code.gitea.io/gitea/routers/web/explore" "code.gitea.io/gitea/services/context" + "code.gitea.io/gitea/services/forms" repo_service "code.gitea.io/gitea/services/repository" ) @@ -31,6 +33,9 @@ func Repos(ctx *context.Context) { ctx.Data["Title"] = ctx.Tr("admin.repositories") ctx.Data["PageIsAdminRepositories"] = true + ctx.Data["EnableSizeLimit"] = setting.EnableSizeLimit + ctx.Data["RepoSizeLimit"] = base.FileSize(setting.RepoSizeLimit) + explore.RenderRepoSearch(ctx, &explore.RepoSearchOptions{ Private: true, PageSize: setting.UI.Admin.RepoPagingNum, @@ -39,6 +44,54 @@ func Repos(ctx *context.Context) { }) } +func UpdateRepoPost(ctx *context.Context) { + temp := web.GetForm(ctx) + if temp == nil { + ctx.Data["Err_Repo_Size_Limit"] = "" + explore.RenderRepoSearch(ctx, &explore.RepoSearchOptions{ + Private: true, + PageSize: setting.UI.Admin.RepoPagingNum, + TplName: tplRepos, + OnlyShowRelevant: false, + }) + return + } + form := temp.(*forms.UpdateGlobalRepoFrom) + ctx.Data["Title"] = ctx.Tr("admin.repositories") + ctx.Data["PageIsAdminRepositories"] = true + + repoSizeLimit, err := base.GetFileSize(form.RepoSizeLimit) + + ctx.Data["EnableSizeLimit"] = form.EnableSizeLimit + ctx.Data["RepoSizeLimit"] = form.RepoSizeLimit + + if err != nil { + ctx.Data["Err_Repo_Size_Limit"] = err.Error() + explore.RenderRepoSearch(ctx, &explore.RepoSearchOptions{ + Private: true, + PageSize: setting.UI.Admin.RepoPagingNum, + TplName: tplRepos, + OnlyShowRelevant: false, + }) + return + } + + err = setting.SaveGlobalRepositorySetting(form.EnableSizeLimit, repoSizeLimit) + if err != nil { + ctx.Data["Err_Repo_Size_Save"] = err.Error() + explore.RenderRepoSearch(ctx, &explore.RepoSearchOptions{ + Private: true, + PageSize: setting.UI.Admin.RepoPagingNum, + TplName: tplRepos, + OnlyShowRelevant: false, + }) + return + } + + ctx.Flash.Success(ctx.Tr("admin.config.repository_setting_success")) + ctx.Redirect(setting.AppSubURL + "/admin/repos") +} + // DeleteRepo delete one repository func DeleteRepo(ctx *context.Context) { repo, err := repo_model.GetRepositoryByID(ctx, ctx.FormInt64("id")) diff --git a/routers/web/admin/repos_test.go b/routers/web/admin/repos_test.go new file mode 100644 index 0000000000000..17594fad2e3ae --- /dev/null +++ b/routers/web/admin/repos_test.go @@ -0,0 +1,26 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package admin + +import ( + "testing" + + "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/services/contexttest" + + "github.com/stretchr/testify/assert" +) + +func TestUpdateRepoPost(t *testing.T) { + unittest.PrepareTestEnv(t) + ctx, _ := contexttest.MockContext(t, "admin/repos") + contexttest.LoadUser(t, ctx, 1) + + ctx.Req.Form.Set("enable_size_limit", "on") + ctx.Req.Form.Set("repo_size_limit", "222 kcmcm") + + UpdateRepoPost(ctx) + + assert.NotEmpty(t, ctx.Flash.ErrorMsg) +} diff --git a/routers/web/explore/repo.go b/routers/web/explore/repo.go index 62090e5bf4dba..9d6b78fbe3b5f 100644 --- a/routers/web/explore/repo.go +++ b/routers/web/explore/repo.go @@ -160,6 +160,18 @@ func RenderRepoSearch(ctx *context.Context, opts *RepoSearchOptions) { } ctx.Data["Page"] = pager + if ctx.Data["Err_Repo_Size_Limit"] != nil { + ctx.RenderWithErr(ctx.Tr("admin.config.invalid_repo_size", ctx.Data["Err_Repo_Size_Limit"].(string)), + opts.TplName, nil) + return + } + + if ctx.Data["Err_Repo_Size_Save"] != nil { + ctx.RenderWithErr(ctx.Tr("admin.config.save_repo_size_setting_failed", ctx.Data["Err_Repo_Size_Save"].(string)), + opts.TplName, nil) + return + } + ctx.HTML(http.StatusOK, opts.TplName) } diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index f7a7cb5d38942..2921557ba6a11 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -293,6 +293,7 @@ func CreatePost(ctx *context.Context) { IsTemplate: form.Template, TrustModel: repo_model.DefaultTrustModel, ObjectFormatName: form.ObjectFormatName, + SizeLimit: form.SizeLimit, }) if err == nil { log.Trace("Repository created [%d]: %s/%s", repo.ID, ctxUser.Name, repo.Name) diff --git a/routers/web/repo/setting/setting.go b/routers/web/repo/setting/setting.go index 485bd927fa932..114a310e75fd4 100644 --- a/routers/web/repo/setting/setting.go +++ b/routers/web/repo/setting/setting.go @@ -61,6 +61,9 @@ func SettingsCtxData(ctx *context.Context) { ctx.Data["DisableNewPushMirrors"] = setting.Mirror.DisableNewPush ctx.Data["DefaultMirrorInterval"] = setting.Mirror.DefaultInterval ctx.Data["MinimumMirrorInterval"] = setting.Mirror.MinInterval + ctx.Data["Err_RepoSize"] = ctx.Repo.Repository.IsRepoSizeOversized(ctx.Repo.Repository.GetActualSizeLimit() / 10) // less than 10% left + ctx.Data["ActualSizeLimit"] = ctx.Repo.Repository.GetActualSizeLimit() + ctx.Data["EnableSizeLimit"] = setting.EnableSizeLimit signing, _ := asymkey_service.SigningKey(ctx, ctx.Repo.Repository.RepoPath()) ctx.Data["SigningKeyAvailable"] = len(signing) > 0 @@ -113,6 +116,7 @@ func SettingsPost(ctx *context.Context) { ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled repo := ctx.Repo.Repository + ctx.Data["Err_RepoSize"] = repo.IsRepoSizeOversized(repo.SizeLimit / 10) // less than 10% left switch ctx.FormString("action") { case "update": @@ -170,6 +174,19 @@ func SettingsPost(ctx *context.Context) { form.Private = repo.BaseRepo.IsPrivate || repo.BaseRepo.Owner.Visibility == structs.VisibleTypePrivate } + if form.RepoSizeLimit < 0 { + ctx.Data["Err_RepoSizeLimit"] = true + ctx.RenderWithErr(ctx.Tr("repo.form.repo_size_limit_negative"), tplSettingsOptions, &form) + return + } + + if !ctx.Doer.IsAdmin && repo.SizeLimit != form.RepoSizeLimit { + ctx.Data["Err_RepoSizeLimit"] = true + ctx.RenderWithErr(ctx.Tr("repo.form.repo_size_limit_only_by_admins"), tplSettingsOptions, &form) + return + } + repo.SizeLimit = form.RepoSizeLimit + if err := repo_service.UpdateRepository(ctx, repo, false); err != nil { ctx.ServerError("UpdateRepository", err) return diff --git a/routers/web/web.go b/routers/web/web.go index 41b019e4b59e5..40b6208210a71 100644 --- a/routers/web/web.go +++ b/routers/web/web.go @@ -736,6 +736,7 @@ func registerRoutes(m *web.Router) { m.Get("", admin.Repos) m.Combo("/unadopted").Get(admin.UnadoptedRepos).Post(admin.AdoptOrDeleteRepository) m.Post("/delete", admin.DeleteRepo) + m.Post("", web.Bind(forms.UpdateGlobalRepoFrom{}), admin.UpdateRepoPost) }) m.Group("/packages", func() { diff --git a/services/forms/repo_form.go b/services/forms/repo_form.go index a2c2af3facdd0..75b6d480db0b9 100644 --- a/services/forms/repo_form.go +++ b/services/forms/repo_form.go @@ -21,6 +21,12 @@ import ( "gitea.com/go-chi/binding" ) +// UpdateGlobalRepoFrom for updating global repository setting +type UpdateGlobalRepoFrom struct { + RepoSizeLimit string + EnableSizeLimit bool +} + // CreateRepoForm form for creating repository type CreateRepoForm struct { UID int64 `binding:"Required"` @@ -46,6 +52,7 @@ type CreateRepoForm struct { ForkSingleBranch string ObjectFormatName string + SizeLimit int64 } // Validate validates the fields @@ -128,6 +135,7 @@ type RepoSettingForm struct { Private bool Template bool EnablePrune bool + RepoSizeLimit int64 // Advanced settings EnableCode bool diff --git a/services/repository/create.go b/services/repository/create.go index 971793bcc6e35..0f4940dff2d81 100644 --- a/services/repository/create.go +++ b/services/repository/create.go @@ -45,6 +45,7 @@ type CreateRepoOptions struct { TrustModel repo_model.TrustModelType MirrorInterval string ObjectFormatName string + SizeLimit int64 } func prepareRepoCommit(ctx context.Context, repo *repo_model.Repository, tmpDir, repoPath string, opts CreateRepoOptions) error { @@ -234,6 +235,7 @@ func CreateRepositoryDirectly(ctx context.Context, doer, u *user_model.User, opt Status: opts.Status, IsEmpty: !opts.AutoInit, TrustModel: opts.TrustModel, + SizeLimit: opts.SizeLimit, IsMirror: opts.IsMirror, DefaultBranch: opts.DefaultBranch, DefaultWikiBranch: setting.Repository.DefaultBranch, diff --git a/templates/admin/repo/list.tmpl b/templates/admin/repo/list.tmpl index 69031e42ebe76..3c87cd8d16d1d 100644 --- a/templates/admin/repo/list.tmpl +++ b/templates/admin/repo/list.tmpl @@ -1,5 +1,26 @@ {{template "admin/layout_head" (dict "ctxData" . "pageClass" "admin")}}