Skip to content

Commit

Permalink
Implements LRU cache for git repos on disk (#17)
Browse files Browse the repository at this point in the history
The GitRepoLRUCache struct and associated methods is a Least Recently
Used cache whose main element is the GitRepoFilePath struct (which in
itself represents an on-disk git repository).

This also adds the git "providers" interface which wrap the methods for cloning
and loading up repos from the different implementers.

Signed-off-by: John McBride <[email protected]>
  • Loading branch information
jpmcb authored Jul 20, 2023
1 parent 34765a9 commit 51ed9ca
Show file tree
Hide file tree
Showing 13 changed files with 1,036 additions and 35 deletions.
23 changes: 21 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,11 +1,30 @@
# This file is useful for doing local development
# when needing to load the postgres database secrets.
# when needing to load the postgres database secrets and start a locally running
# pizza oven service

# Database env vars
DATABASE_PORT=9999
DATABASE_HOST=localhost
DATABASE_USER=opensauced-admin
DATABASE_PASSWORD={YOUR-SECRET-PASSWORD-HERE}
DATABASE_DBNAME=pizza

# The port for the Pizza oven server
# The port for the Pizza oven server to use
SERVER_PORT=8080

# The git provider to use for the pizza oven service.
# Must be one of "cache" or "memory" to designate the git provider that will be
# used to clone and access repos.
# - The "cache" git provider uses a local cache on disk to clone git repos into.
# This uses much less memory than in-memory cloning.
GIT_PROVIDER=cache

# The settings for the cached git repos.
# Must be set when "GIT_PROVIDER" is set to "cache"
#
# The root directory where the git repo cache should be stored
CACHE_DIR=/tmp
# The minimum amount of free disk in Gb to keep. This ensures that the cache
# does not completely fill the disk and allows for some buffer before items
# are evicted from the cache.
MIN_FREE_DISK_GB=25
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ This will start the go app, connect to your local postgres database
using your `.env` file or existing environment variables,
and start accepting requests.

See the `.env.example` file to see what environment variables are expected.

### Local kubernetes setup

To get a local environment setup with a postgres database without having to start and configure one yourself,
Expand Down
19 changes: 16 additions & 3 deletions hack/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ metadata:
spec:
teamId: "opensauced"
volume:
size: 2Gi
size: 25Gi
numberOfInstances: 1
users:
# The database owner/admin for the pizza database
Expand Down Expand Up @@ -157,8 +157,20 @@ spec:
value: "pizza"
- name: SERVER_PORT
value: "8080"
- name: GIT_PROVIDER
value: "cache"
- name: CACHE_DIR
value: "/data/cache"
- name: MIN_FREE_DISK_GB
value: "25"
ports:
- containerPort: 8080
volumeMounts:
- name: pizza-cache
mountPath: /data/cache
volumes:
- name: pizza-cache
emptyDir: {}
EOF

# The pod may take a second to be able to be waited on via kubectl
Expand Down Expand Up @@ -199,9 +211,10 @@ echo "Opening port to postgres operator to apply database migrations"
echo
forward_postgres_port &

# Sleep for abit so the postgres database has time to initialize and
# Wait for the postgres cluster to come and up and be ready to accept requests
# be ready to accept requests and incoming queries
sleep 10
sleep 3
kubectl wait --for=jsonpath='{.status.PostgresClusterStatus}'=Running postgresqls/opensauced-pizza-postgres-cluster

# apply the migrations to the database
echo
Expand Down
66 changes: 63 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,20 +1,46 @@
package main

import (
"flag"
"log"
"os"
"strconv"

"github.com/joho/godotenv"
"go.uber.org/zap"

"github.com/open-sauced/pizza/oven/pkg/database"
"github.com/open-sauced/pizza/oven/pkg/providers"
"github.com/open-sauced/pizza/oven/pkg/server"
)

func main() {
var logger *zap.Logger
var err error

// Initialize & parse flags
debugMode := flag.Bool("debug", false, "run in debug mode")
flag.Parse()

if *debugMode {
logger, err = zap.NewDevelopment()
if err != nil {
log.Fatalf("Could not initiate debug zap logger: %v", err)
}
} else {
logger, err = zap.NewProduction()
if err != nil {
log.Fatalf("Could not initiate production zap logger: %v", err)
}
}

sugarLogger := logger.Sugar()
sugarLogger.Infof("initiated zap logger with level: %d", sugarLogger.Level())

// Load the environment variables from the .env file
err := godotenv.Load()
err = godotenv.Load()
if err != nil {
log.Printf("Failed to load the dot env file. Continuing with existing environment: %v", err)
sugarLogger.Warnf("Failed to load the dot env file. Continuing with existing environment: %v", err)
}

// Envs for the pizza oven database handler
Expand All @@ -27,7 +53,41 @@ func main() {
// Env vars for the pizza oven server
serverPort := os.Getenv("SERVER_PORT")

// User specify which git provider to use
gitProvider := os.Getenv("GIT_PROVIDER")

// Initialize the database handler
pizzaOven := database.NewPizzaOvenDbHandler(databaseHost, databasePort, databaseUser, databasePwd, databaseDbName)
pizzaOvenServer := server.NewPizzaOvenServer(pizzaOven)

var pizzaGitProvider providers.GitRepoProvider
switch gitProvider {
case "cache":
sugarLogger.Infof("Initiating cache git provider")

// Env vars for the git provider
cacheDir := os.Getenv("CACHE_DIR")
minFreeDisk := os.Getenv("MIN_FREE_DISK_GB")

// Validates the provided minimum free disk int is parsable as a uint64
//
// TODO - should dynamically check file system bit size after compilation.
// 64 bit wide words should be fine for almost all use cases for now.
minFreeDiskUint64, err := strconv.ParseUint(minFreeDisk, 10, 64)
if err != nil {
sugarLogger.Fatalf(": %s", err.Error())
}

pizzaGitProvider, err = providers.NewLRUCacheGitRepoProvider(cacheDir, minFreeDiskUint64, sugarLogger)
if err != nil {
sugarLogger.Fatalf("Could not create a cache git provider: %s", err.Error())
}
case "memory":
sugarLogger.Infof("Initiating in-memory git provider")
pizzaGitProvider = providers.NewInMemoryGitRepoProvider(sugarLogger)
default:
sugarLogger.Fatal("must specify the GIT_PROVIDER env variable (i.e. cache, memory)")
}

pizzaOvenServer := server.NewPizzaOvenServer(pizzaOven, pizzaGitProvider, sugarLogger)
pizzaOvenServer.Run(serverPort)
}
63 changes: 63 additions & 0 deletions pkg/cache/gitrepofilepath.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package cache

import (
"sync"

"github.com/go-git/go-git/v5"
)

// GitRepoFilePath is a key / value pair with a locking mutex which represents
// the key to a git repository (typically the remote URL) and its file path on disk.
// This is used as the primary element in GitRepoLRUCache.
//
// When processing and operations are completed for an individual GitRepoFilePath,
// always call "Done" to ensure no deadlocks occur on individual elements within
// a given GItRepoLRUCache.
// Example: "repo.Done()"
type GitRepoFilePath struct {
// A locking mutex is used to ensure that on-disk git repos are not
// modified during processing.
// Locking is done manually via "element.lock.Lock()" within the cache package.
// Once operations are completed, in order to free up the resource, the "Done()"
// method should be called.
lock sync.Mutex

// The key for the GitRepoFilePath key/value pair, generally, is the
// remote URL for the git repository
key string

// path is the value in the GitRepoFilePath key/value and denotes the
// filepath on-disk to the cloned git repository
path string
}

// OpenAndFetch opens a git repository on-disk and fetches the latest changes.
// If the git.NoErrAlreadyUpToDate error is produced, this function does not
// return an error but, instead, continues and returns the repo.
func (g *GitRepoFilePath) OpenAndFetch() (*git.Repository, error) {
repo, err := git.PlainOpen(g.path)
if err != nil {
return nil, err
}

// Get the worktree for the repository
w, err := repo.Worktree()
if err != nil {
return nil, err
}

// Pull the latest changes from the origin remote and merge into the current branch
err = w.Pull(&git.PullOptions{})
if err != nil && err != git.NoErrAlreadyUpToDate {
return nil, err
}

return repo, nil
}

// Done is a thin wrapper for unlocking the GitRepoFilePath's mutex.
// This should ALWAYS be called when operations and processing for this
// individual on-disk repo are completed in order to prevent a deadlock.
func (g *GitRepoFilePath) Done() {
g.lock.Unlock()
}
49 changes: 49 additions & 0 deletions pkg/cache/gitrepofilepath_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package cache

import "testing"

func TestOpenAndFetch(t *testing.T) {
tests := []struct {
name string
cacheDir string
repos []string
}{
{
name: "Puts repos into cache in sequential order",
cacheDir: t.TempDir(),
repos: []string{
"https://github.com/open-sauced/pizza",
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a new LRU cache
c, err := NewGitRepoLRUCache(tt.cacheDir, 100)
if err != nil {
t.Fatalf("unexpected err: %s", err.Error())
}

// Populate the cache with the repos
for _, repo := range tt.repos {
repoFp, err := c.Put(repo)
if err != nil {
t.Fatalf("unexpected err putting to cache: %s", err.Error())
}
repoFp.Done()
}

// Get the first element in the cache
repoFp := c.dll.Front().Value.(*GitRepoFilePath)
repoFp.lock.Lock()
defer repoFp.Done()

// Open and fetch the repo ensuring a non-nil git repo is returned
openedRepo, err := repoFp.OpenAndFetch()
if openedRepo == nil || err != nil {
t.Fatalf("Opened repo unexpectedly failed to open and/or fetch: %s", err.Error())
}
})
}
}
Loading

0 comments on commit 51ed9ca

Please sign in to comment.