Skip to content

Commit 51ed9ca

Browse files
authored
Implements LRU cache for git repos on disk (#17)
The GitRepoLRUCache struct and associated methods is a Least Recently Used cache whose main element is the GitRepoFilePath struct (which in itself represents an on-disk git repository). This also adds the git "providers" interface which wrap the methods for cloning and loading up repos from the different implementers. Signed-off-by: John McBride <[email protected]>
1 parent 34765a9 commit 51ed9ca

File tree

13 files changed

+1036
-35
lines changed

13 files changed

+1036
-35
lines changed

.env.example

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,30 @@
11
# This file is useful for doing local development
2-
# when needing to load the postgres database secrets.
2+
# when needing to load the postgres database secrets and start a locally running
3+
# pizza oven service
34

5+
# Database env vars
46
DATABASE_PORT=9999
57
DATABASE_HOST=localhost
68
DATABASE_USER=opensauced-admin
79
DATABASE_PASSWORD={YOUR-SECRET-PASSWORD-HERE}
810
DATABASE_DBNAME=pizza
911

10-
# The port for the Pizza oven server
12+
# The port for the Pizza oven server to use
1113
SERVER_PORT=8080
14+
15+
# The git provider to use for the pizza oven service.
16+
# Must be one of "cache" or "memory" to designate the git provider that will be
17+
# used to clone and access repos.
18+
# - The "cache" git provider uses a local cache on disk to clone git repos into.
19+
# This uses much less memory than in-memory cloning.
20+
GIT_PROVIDER=cache
21+
22+
# The settings for the cached git repos.
23+
# Must be set when "GIT_PROVIDER" is set to "cache"
24+
#
25+
# The root directory where the git repo cache should be stored
26+
CACHE_DIR=/tmp
27+
# The minimum amount of free disk in Gb to keep. This ensures that the cache
28+
# does not completely fill the disk and allows for some buffer before items
29+
# are evicted from the cache.
30+
MIN_FREE_DISK_GB=25

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ This will start the go app, connect to your local postgres database
6666
using your `.env` file or existing environment variables,
6767
and start accepting requests.
6868

69+
See the `.env.example` file to see what environment variables are expected.
70+
6971
### Local kubernetes setup
7072

7173
To get a local environment setup with a postgres database without having to start and configure one yourself,

hack/setup.sh

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ metadata:
7979
spec:
8080
teamId: "opensauced"
8181
volume:
82-
size: 2Gi
82+
size: 25Gi
8383
numberOfInstances: 1
8484
users:
8585
# The database owner/admin for the pizza database
@@ -157,8 +157,20 @@ spec:
157157
value: "pizza"
158158
- name: SERVER_PORT
159159
value: "8080"
160+
- name: GIT_PROVIDER
161+
value: "cache"
162+
- name: CACHE_DIR
163+
value: "/data/cache"
164+
- name: MIN_FREE_DISK_GB
165+
value: "25"
160166
ports:
161167
- containerPort: 8080
168+
volumeMounts:
169+
- name: pizza-cache
170+
mountPath: /data/cache
171+
volumes:
172+
- name: pizza-cache
173+
emptyDir: {}
162174
EOF
163175

164176
# The pod may take a second to be able to be waited on via kubectl
@@ -199,9 +211,10 @@ echo "Opening port to postgres operator to apply database migrations"
199211
echo
200212
forward_postgres_port &
201213

202-
# Sleep for abit so the postgres database has time to initialize and
214+
# Wait for the postgres cluster to come and up and be ready to accept requests
203215
# be ready to accept requests and incoming queries
204-
sleep 10
216+
sleep 3
217+
kubectl wait --for=jsonpath='{.status.PostgresClusterStatus}'=Running postgresqls/opensauced-pizza-postgres-cluster
205218

206219
# apply the migrations to the database
207220
echo

main.go

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,46 @@
11
package main
22

33
import (
4+
"flag"
45
"log"
56
"os"
7+
"strconv"
68

79
"github.com/joho/godotenv"
10+
"go.uber.org/zap"
811

912
"github.com/open-sauced/pizza/oven/pkg/database"
13+
"github.com/open-sauced/pizza/oven/pkg/providers"
1014
"github.com/open-sauced/pizza/oven/pkg/server"
1115
)
1216

1317
func main() {
18+
var logger *zap.Logger
19+
var err error
20+
21+
// Initialize & parse flags
22+
debugMode := flag.Bool("debug", false, "run in debug mode")
23+
flag.Parse()
24+
25+
if *debugMode {
26+
logger, err = zap.NewDevelopment()
27+
if err != nil {
28+
log.Fatalf("Could not initiate debug zap logger: %v", err)
29+
}
30+
} else {
31+
logger, err = zap.NewProduction()
32+
if err != nil {
33+
log.Fatalf("Could not initiate production zap logger: %v", err)
34+
}
35+
}
36+
37+
sugarLogger := logger.Sugar()
38+
sugarLogger.Infof("initiated zap logger with level: %d", sugarLogger.Level())
39+
1440
// Load the environment variables from the .env file
15-
err := godotenv.Load()
41+
err = godotenv.Load()
1642
if err != nil {
17-
log.Printf("Failed to load the dot env file. Continuing with existing environment: %v", err)
43+
sugarLogger.Warnf("Failed to load the dot env file. Continuing with existing environment: %v", err)
1844
}
1945

2046
// Envs for the pizza oven database handler
@@ -27,7 +53,41 @@ func main() {
2753
// Env vars for the pizza oven server
2854
serverPort := os.Getenv("SERVER_PORT")
2955

56+
// User specify which git provider to use
57+
gitProvider := os.Getenv("GIT_PROVIDER")
58+
59+
// Initialize the database handler
3060
pizzaOven := database.NewPizzaOvenDbHandler(databaseHost, databasePort, databaseUser, databasePwd, databaseDbName)
31-
pizzaOvenServer := server.NewPizzaOvenServer(pizzaOven)
61+
62+
var pizzaGitProvider providers.GitRepoProvider
63+
switch gitProvider {
64+
case "cache":
65+
sugarLogger.Infof("Initiating cache git provider")
66+
67+
// Env vars for the git provider
68+
cacheDir := os.Getenv("CACHE_DIR")
69+
minFreeDisk := os.Getenv("MIN_FREE_DISK_GB")
70+
71+
// Validates the provided minimum free disk int is parsable as a uint64
72+
//
73+
// TODO - should dynamically check file system bit size after compilation.
74+
// 64 bit wide words should be fine for almost all use cases for now.
75+
minFreeDiskUint64, err := strconv.ParseUint(minFreeDisk, 10, 64)
76+
if err != nil {
77+
sugarLogger.Fatalf(": %s", err.Error())
78+
}
79+
80+
pizzaGitProvider, err = providers.NewLRUCacheGitRepoProvider(cacheDir, minFreeDiskUint64, sugarLogger)
81+
if err != nil {
82+
sugarLogger.Fatalf("Could not create a cache git provider: %s", err.Error())
83+
}
84+
case "memory":
85+
sugarLogger.Infof("Initiating in-memory git provider")
86+
pizzaGitProvider = providers.NewInMemoryGitRepoProvider(sugarLogger)
87+
default:
88+
sugarLogger.Fatal("must specify the GIT_PROVIDER env variable (i.e. cache, memory)")
89+
}
90+
91+
pizzaOvenServer := server.NewPizzaOvenServer(pizzaOven, pizzaGitProvider, sugarLogger)
3292
pizzaOvenServer.Run(serverPort)
3393
}

pkg/cache/gitrepofilepath.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package cache
2+
3+
import (
4+
"sync"
5+
6+
"github.com/go-git/go-git/v5"
7+
)
8+
9+
// GitRepoFilePath is a key / value pair with a locking mutex which represents
10+
// the key to a git repository (typically the remote URL) and its file path on disk.
11+
// This is used as the primary element in GitRepoLRUCache.
12+
//
13+
// When processing and operations are completed for an individual GitRepoFilePath,
14+
// always call "Done" to ensure no deadlocks occur on individual elements within
15+
// a given GItRepoLRUCache.
16+
// Example: "repo.Done()"
17+
type GitRepoFilePath struct {
18+
// A locking mutex is used to ensure that on-disk git repos are not
19+
// modified during processing.
20+
// Locking is done manually via "element.lock.Lock()" within the cache package.
21+
// Once operations are completed, in order to free up the resource, the "Done()"
22+
// method should be called.
23+
lock sync.Mutex
24+
25+
// The key for the GitRepoFilePath key/value pair, generally, is the
26+
// remote URL for the git repository
27+
key string
28+
29+
// path is the value in the GitRepoFilePath key/value and denotes the
30+
// filepath on-disk to the cloned git repository
31+
path string
32+
}
33+
34+
// OpenAndFetch opens a git repository on-disk and fetches the latest changes.
35+
// If the git.NoErrAlreadyUpToDate error is produced, this function does not
36+
// return an error but, instead, continues and returns the repo.
37+
func (g *GitRepoFilePath) OpenAndFetch() (*git.Repository, error) {
38+
repo, err := git.PlainOpen(g.path)
39+
if err != nil {
40+
return nil, err
41+
}
42+
43+
// Get the worktree for the repository
44+
w, err := repo.Worktree()
45+
if err != nil {
46+
return nil, err
47+
}
48+
49+
// Pull the latest changes from the origin remote and merge into the current branch
50+
err = w.Pull(&git.PullOptions{})
51+
if err != nil && err != git.NoErrAlreadyUpToDate {
52+
return nil, err
53+
}
54+
55+
return repo, nil
56+
}
57+
58+
// Done is a thin wrapper for unlocking the GitRepoFilePath's mutex.
59+
// This should ALWAYS be called when operations and processing for this
60+
// individual on-disk repo are completed in order to prevent a deadlock.
61+
func (g *GitRepoFilePath) Done() {
62+
g.lock.Unlock()
63+
}

pkg/cache/gitrepofilepath_test.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package cache
2+
3+
import "testing"
4+
5+
func TestOpenAndFetch(t *testing.T) {
6+
tests := []struct {
7+
name string
8+
cacheDir string
9+
repos []string
10+
}{
11+
{
12+
name: "Puts repos into cache in sequential order",
13+
cacheDir: t.TempDir(),
14+
repos: []string{
15+
"https://github.com/open-sauced/pizza",
16+
},
17+
},
18+
}
19+
20+
for _, tt := range tests {
21+
t.Run(tt.name, func(t *testing.T) {
22+
// Create a new LRU cache
23+
c, err := NewGitRepoLRUCache(tt.cacheDir, 100)
24+
if err != nil {
25+
t.Fatalf("unexpected err: %s", err.Error())
26+
}
27+
28+
// Populate the cache with the repos
29+
for _, repo := range tt.repos {
30+
repoFp, err := c.Put(repo)
31+
if err != nil {
32+
t.Fatalf("unexpected err putting to cache: %s", err.Error())
33+
}
34+
repoFp.Done()
35+
}
36+
37+
// Get the first element in the cache
38+
repoFp := c.dll.Front().Value.(*GitRepoFilePath)
39+
repoFp.lock.Lock()
40+
defer repoFp.Done()
41+
42+
// Open and fetch the repo ensuring a non-nil git repo is returned
43+
openedRepo, err := repoFp.OpenAndFetch()
44+
if openedRepo == nil || err != nil {
45+
t.Fatalf("Opened repo unexpectedly failed to open and/or fetch: %s", err.Error())
46+
}
47+
})
48+
}
49+
}

0 commit comments

Comments
 (0)