Skip to content

Commit 73b26e5

Browse files
authored
Support tmpfs and a server queue for C++ compilers (#10)
This PR drops an option -working-dir, adding two new instead: * -cpp-dir {string}, Directory for incoming C++ files and src cache * -obj-dir {string}, Directory for resulting obj files and obj cache The directory passed as -cpp-dir can be placed in tmpfs. Also, nocc-server now managers C++ compiler launches with a waiting queue. The purpose of a waiting queue is not to over-utilize server resources at peak times.
1 parent edc04d3 commit 73b26e5

28 files changed

+806
-482
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
RELEASE = v1.1.2
1+
RELEASE = v1.2
22
BUILD_COMMIT := $(shell git rev-parse --short HEAD)
33
DATE := $(shell date -u '+%F %X UTC')
44
VERSION := ${RELEASE}, rev ${BUILD_COMMIT}, compiled at ${DATE}

cmd/nocc-daemon/main.go

+6-3
Original file line numberDiff line numberDiff line change
@@ -91,17 +91,20 @@ func main() {
9191
}
9292

9393
if *checkServersAndExit {
94-
if len(remoteNoccHosts) == 0 {
95-
failedStart("no remote hosts set; you should set NOCC_SERVERS or NOCC_SERVERS_FILENAME")
96-
}
9794
if len(os.Args) == 3 { // nocc -check-servers {remoteHostPort}
9895
remoteNoccHosts = []string{os.Args[2]}
9996
}
97+
if len(remoteNoccHosts) == 0 {
98+
failedStart("no remote hosts set; you should set NOCC_SERVERS or NOCC_SERVERS_FILENAME")
99+
}
100100
client.RequestRemoteStatus(remoteNoccHosts)
101101
os.Exit(0)
102102
}
103103

104104
if *dumpServerLogsAndExit {
105+
if len(os.Args) == 3 { // nocc -dump-server-logs {remoteHostPort}
106+
remoteNoccHosts = []string{os.Args[2]}
107+
}
105108
if len(remoteNoccHosts) == 0 {
106109
failedStart("no remote hosts set; you should set NOCC_SERVERS or NOCC_SERVERS_FILENAME")
107110
}

cmd/nocc-server/main.go

+30-25
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import (
44
"fmt"
55
"net"
66
"os"
7-
"path"
7+
"runtime"
88
"time"
99

1010
"github.com/VKCOM/nocc/internal/common"
@@ -18,24 +18,29 @@ func failedStart(message string, err error) {
1818
os.Exit(1)
1919
}
2020

21-
// cleanupWorkingDir ensures that workingDir exists and is empty
21+
// prepareEmptyDir ensures that serverDir exists and is empty
2222
// it's executed on server launch
2323
// as a consequence, all file caches are lost on restart
24-
func cleanupWorkingDir(workingDir string) error {
25-
oldWorkingDir := workingDir + ".old"
26-
27-
if err := os.RemoveAll(oldWorkingDir); err != nil {
28-
failedStart("can't remove old working dir", err)
29-
}
30-
if _, err := os.Stat(workingDir); err == nil {
31-
if err := os.Rename(workingDir, oldWorkingDir); err != nil {
32-
failedStart("can't rename working dir %s to .old", err)
24+
func prepareEmptyDir(parentDir *string, subdir string) string {
25+
// if /tmp/nocc/cpp/src-cache already exists, it means, that it contains files from a previous launch
26+
// to start up as quickly as possible, do the following:
27+
// 1) rename it to /tmp/nocc/cpp/src-cache.old
28+
// 2) clear it recursively in the background
29+
serverDir := *parentDir + "/" + subdir
30+
if _, err := os.Stat(serverDir); err == nil {
31+
oldDirRenamed := fmt.Sprintf("%s.old.%d", serverDir, time.Now().Unix())
32+
if err := os.Rename(serverDir, oldDirRenamed); err != nil {
33+
failedStart("can't rename "+serverDir, err)
3334
}
35+
go func() {
36+
_ = os.RemoveAll(oldDirRenamed)
37+
}()
3438
}
35-
if err := os.MkdirAll(workingDir, os.ModePerm); err != nil {
36-
return err
39+
40+
if err := os.MkdirAll(serverDir, os.ModePerm); err != nil {
41+
failedStart("can't create "+serverDir, err)
3742
}
38-
return nil
43+
return serverDir
3944
}
4045

4146
// printDockerContainerIP is a dev/debug function called only when build special for local Docker, for local testing.
@@ -58,8 +63,10 @@ func main() {
5863
"host", "")
5964
listenPort := common.CmdEnvInt("Listening port, default 43210.", 43210,
6065
"port", "")
61-
workingDir := common.CmdEnvString("Directory for saving incoming files, default /tmp/nocc-server.", "/tmp/nocc-server",
62-
"working-dir", "")
66+
cppStoreDir := common.CmdEnvString("Directory for incoming C++ files and src cache, default /tmp/nocc/cpp.\nIt can be placed in tmpfs to speed up compilation", "/tmp/nocc/cpp",
67+
"cpp-dir", "")
68+
objStoreDir := common.CmdEnvString("Directory for resulting obj files and obj cache, default /tmp/nocc/obj.", "/tmp/nocc/obj",
69+
"obj-dir", "")
6370
logFileName := common.CmdEnvString("A filename to log, by default use stderr.", "",
6471
"log-filename", "")
6572
logVerbosity := common.CmdEnvInt("Logger verbosity level for INFO (-1 off, default 0, max 2).\nErrors are logged always.", 0,
@@ -70,6 +77,8 @@ func main() {
7077
"obj-cache-limit", "")
7178
statsdHostPort := common.CmdEnvString("Statsd udp address (host:port), omitted by default.\nIf omitted, stats won't be written.", "",
7279
"statsd", "")
80+
maxParallelCxx := common.CmdEnvInt("Max amount of C++ compiler processes launched in parallel, other ready sessions are waiting in a queue.\nBy default, it's a number of CPUs on the current machine.", int64(runtime.NumCPU()),
81+
"max-parallel-cxx", "")
7382

7483
common.ParseCmdFlagsCombiningWithEnv()
7584

@@ -78,10 +87,6 @@ func main() {
7887
os.Exit(0)
7988
}
8089

81-
if err = cleanupWorkingDir(*workingDir); err != nil {
82-
failedStart("Can't create working directory "+*workingDir, err)
83-
}
84-
8590
if err = server.MakeLoggerServer(*logFileName, *logVerbosity); err != nil {
8691
failedStart("Can't init logger", err)
8792
}
@@ -95,12 +100,12 @@ func main() {
95100
failedStart("Failed to connect to statsd", err)
96101
}
97102

98-
s.ActiveClients, err = server.MakeClientsStorage(path.Join(*workingDir, "clients"))
103+
s.ActiveClients, err = server.MakeClientsStorage(prepareEmptyDir(cppStoreDir, "clients"))
99104
if err != nil {
100105
failedStart("Failed to init clients hashtable", err)
101106
}
102107

103-
s.CxxLauncher, err = server.MakeCxxLauncher()
108+
s.CxxLauncher, err = server.MakeCxxLauncher(*maxParallelCxx)
104109
if err != nil {
105110
failedStart("Failed to init cxx launcher", err)
106111
}
@@ -110,17 +115,17 @@ func main() {
110115
failedStart("Failed to init system headers hashtable", err)
111116
}
112117

113-
s.SrcFileCache, err = server.MakeSrcFileCache(path.Join(*workingDir, "src-cache"), *srcCacheLimit)
118+
s.SrcFileCache, err = server.MakeSrcFileCache(prepareEmptyDir(cppStoreDir, "src-cache"), *srcCacheLimit)
114119
if err != nil {
115120
failedStart("Failed to init src file cache", err)
116121
}
117122

118-
s.ObjFileCache, err = server.MakeObjFileCache(path.Join(*workingDir, "obj-cache"), *objCacheLimit)
123+
s.ObjFileCache, err = server.MakeObjFileCache(prepareEmptyDir(objStoreDir, "obj-cache"), prepareEmptyDir(objStoreDir, "cxx-out"), *objCacheLimit)
119124
if err != nil {
120125
failedStart("Failed to init obj file cache", err)
121126
}
122127

123-
s.PchCompilation, err = server.MakePchCompilation(path.Join(*workingDir, "pch"))
128+
s.PchCompilation, err = server.MakePchCompilation(prepareEmptyDir(cppStoreDir, "pch"))
124129
if err != nil {
125130
failedStart("Failed to init pch compilation", err)
126131
}

cmd/nocc.cpp

+1-15
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ char *format_time_to_log() {
4444
static char time_buf[64];
4545
time_t ts = time(nullptr);
4646
tm *now = localtime(&ts);
47-
sprintf(time_buf, "%d/%02d/%02d %02d:%02d:%02d", 1900 + now->tm_year, 1 + now->tm_mon, now->tm_mday, now->tm_hour, now->tm_min, now->tm_sec);
47+
sprintf(time_buf, "%d-%02d-%02d %02d:%02d:%02d", 1900 + now->tm_year, 1 + now->tm_mon, now->tm_mday, now->tm_hour, now->tm_min, now->tm_sec);
4848
return time_buf;
4949
}
5050

@@ -97,13 +97,6 @@ void __attribute__((noreturn)) execute_cxx_locally(const char *errToPrint, int e
9797
exit(1);
9898
}
9999

100-
void __attribute__((noreturn)) execute_distcc_locally() {
101-
ARGV[0] = strdup("distcc");
102-
execvp("distcc", ARGV + 0);
103-
printf("could not run `distcc`, exit(1)\n");
104-
exit(1);
105-
}
106-
107100
void __attribute__((noreturn)) execute_go_nocc_instead_of_cpp() {
108101
execv(NOCC_GO_EXECUTABLE, ARGV);
109102
printf("could not run %s, exit(1)\n", NOCC_GO_EXECUTABLE);
@@ -279,13 +272,6 @@ int main(int argc, char *argv[]) {
279272
exit(1);
280273
}
281274

282-
// this possible fallback will be available for some time just in case
283-
char *env_fallback_to_distcc = getenv("NOCC_FALLBACK_TO_DISTCC");
284-
bool fallback_to_distcc = env_fallback_to_distcc != nullptr && env_fallback_to_distcc[0] == '1';
285-
if (fallback_to_distcc) {
286-
execute_distcc_locally();
287-
}
288-
289275
if (ARGC == 2 && !strcmp(ARGV[1], "start")) {
290276
int sockfd = connect_to_go_daemon_or_start_a_new_one();
291277
exit(sockfd == -1 ? 1 : 0);

docs/configuration.md

+32-10
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,18 @@ When you launch lots of jobs like `make -j 600`, then `nocc-daemon` has to maint
3434
All configuration on a server-side is done using command-line arguments.
3535
For a server, they are more reliable than environment variables.
3636

37-
| Cmd argument | Description |
38-
|--------------------------|-----------------------------------------------------------------------------------------|
39-
| `-host {string}` | Binding address, default 0.0.0.0. |
40-
| `-port {int}` | Listening port, default 43210. |
41-
| `-working-dir {string}` | Directory for saving incoming files, default */tmp/nocc-server*. |
42-
| `-log-filename {string}` | A filename to log, by default use stderr. |
43-
| `-log-verbosity {int}` | Logger verbosity level for INFO (-1 off, default 0, max 2). Errors are logged always. |
44-
| `-src-cache-limit {int}` | Header and source cache limit, in bytes, default 4G. |
45-
| `-obj-cache-limit {int}` | Compiled obj cache limit, in bytes, default 16G. |
46-
| `-statsd {string}` | Statsd udp address (host:port), omitted by default. If omitted, stats won't be written. |
37+
| Cmd argument | Description |
38+
|---------------------------|-----------------------------------------------------------------------------------------|
39+
| `-host {string}` | Binding address, default 0.0.0.0. |
40+
| `-port {int}` | Listening port, default 43210. |
41+
| `-cpp-dir {string}` | Directory for incoming C++ files and src cache, default */tmp/nocc/cpp*. |
42+
| `-obj-dir {string}` | Directory for resulting obj files and obj cache, default */tmp/nocc/obj*. |
43+
| `-log-filename {string}` | A filename to log, by default use stderr. |
44+
| `-log-verbosity {int}` | Logger verbosity level for INFO (-1 off, default 0, max 2). Errors are logged always. |
45+
| `-src-cache-limit {int}` | Header and source cache limit, in bytes, default 4G. |
46+
| `-obj-cache-limit {int}` | Compiled obj cache limit, in bytes, default 16G. |
47+
| `-statsd {string}` | Statsd udp address (host:port), omitted by default. If omitted, stats won't be written. |
48+
| `-max-parallel-cxx {int}` | Max amount of C++ compiler processes launched in parallel, default *nCPU*. |
4749

4850
All file caches are lost on restart, as references to files are kept in memory.
4951
There is also an LRU expiration mechanism to fit cache limits.
@@ -75,6 +77,26 @@ A list of all written stats could be obtained [inside statsd.go](../internal/ser
7577
They are quite intuitive, that's why we don't duplicate them here.
7678

7779

80+
<p><br></p>
81+
82+
## Configuring nocc + tmpfs
83+
84+
The directory passed as `-cpp-dir` can be placed in **tmpfs**.
85+
All operations with cpp files are performed in that directory:
86+
* incoming files (h/cpp/etc.) are saved there mirroring client's file structure;
87+
* src-cache is placed there;
88+
* pch files are placed there;
89+
* tmp files for preventing race conditions are also there, not in sys tmp dir.
90+
91+
So, if that directory is placed in tmpfs, the C++ compiler will take all files from memory (except for system headers),
92+
which noticeably speeds up compilation.
93+
94+
When setting up limits to tmpfs in a system, ensure that it will fit `-src-cache-limit` plus some extra space.
95+
96+
Note, that placing `-obj-dir` in tmpfs is not recommended, because obj files are usually much heavier,
97+
and they are just transparently streamed back from a hard disk in chunks.
98+
99+
78100
<p><br></p>
79101

80102
## Other commands from a client

internal/client/compile-remotely.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func CompileCppRemotely(daemon *Daemon, cwd string, invocation *Invocation, remo
5252
return 0, nil, nil, err
5353
}
5454

55-
logClient.Info(1, "remote", remote.remoteHostPort, "sessionID", invocation.sessionID, "waiting", len(fileIndexesToUpload), "uploads", invocation.cppInFile)
55+
logClient.Info(1, "remote", remote.remoteHost, "sessionID", invocation.sessionID, "waiting", len(fileIndexesToUpload), "uploads", invocation.cppInFile)
5656
logClient.Info(2, "checked", len(requiredFiles), "files whether upload is needed or they exist on remote")
5757
invocation.summary.AddTiming("remote_session")
5858

@@ -75,7 +75,7 @@ func CompileCppRemotely(daemon *Daemon, cwd string, invocation *Invocation, remo
7575

7676
// Now, we have a resulting .o file placed in a path determined by -o from command line.
7777
if exitCode != 0 {
78-
logClient.Info(0, "remote C++ compiler exited with code", exitCode, "sessionID", invocation.sessionID, invocation.cppInFile, remote.remoteHostPort)
78+
logClient.Info(0, "remote C++ compiler exited with code", exitCode, "sessionID", invocation.sessionID, invocation.cppInFile, remote.remoteHost)
7979
logClient.Info(1, "cxxExitCode:", exitCode, "sessionID", invocation.sessionID, "\ncxxStdout:", strings.TrimSpace(string(invocation.cxxStdout)), "\ncxxStderr:", strings.TrimSpace(string(invocation.cxxStderr)))
8080
} else {
8181
logClient.Info(2, "saved obj file to", invocation.objOutFile)

internal/client/daemon.go

+40-15
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import (
1919
)
2020

2121
const (
22-
timeoutForceInterruptInvocation = 5 * time.Minute
22+
timeoutForceInterruptInvocation = 8 * time.Minute
2323
)
2424

2525
// Daemon is created once, in a separate process `nocc-daemon`, which is listening for connections via unix socket.
@@ -37,6 +37,7 @@ type Daemon struct {
3737

3838
listener *DaemonUnixSockListener
3939
remoteConnections []*RemoteConnection
40+
allRemotesDelim string
4041
localCxxThrottle chan struct{}
4142

4243
disableObjCache bool
@@ -77,31 +78,55 @@ func detectHostUserName() string {
7778
return curUser.Username
7879
}
7980

80-
func MakeDaemon(remoteNoccHosts []string, disableObjCache bool, disableOwnIncludes bool, localCxxQueueSize int64) (*Daemon, error) {
81+
func MakeDaemon(remoteNoccHosts []string, disableObjCache bool, disableOwnIncludes bool, maxLocalCxxProcesses int64) (*Daemon, error) {
82+
// send env NOCC_SERVERS on connect everywhere
83+
// this is for debugging purpose: in production, all clients should have the same servers list
84+
// to ensure this, just grep server logs: only one unique string should appear
85+
allRemotesDelim := ""
86+
for _, remoteHostPort := range remoteNoccHosts {
87+
if allRemotesDelim != "" {
88+
allRemotesDelim += ","
89+
}
90+
allRemotesDelim += ExtractRemoteHostWithoutPort(remoteHostPort)
91+
}
92+
8193
// env NOCC_SERVERS and others are supposed to be the same between `nocc` invocations
8294
// (in practice, this is true, as the first `nocc` invocation has no precedence over any other in a bunch)
8395
daemon := &Daemon{
8496
startTime: time.Now(),
8597
quitChan: make(chan int),
8698
clientID: detectClientID(),
8799
hostUserName: detectHostUserName(),
88-
remoteConnections: make([]*RemoteConnection, 0, len(remoteNoccHosts)),
89-
localCxxThrottle: make(chan struct{}, localCxxQueueSize),
100+
remoteConnections: make([]*RemoteConnection, len(remoteNoccHosts)),
101+
allRemotesDelim: allRemotesDelim,
102+
localCxxThrottle: make(chan struct{}, maxLocalCxxProcesses),
90103
disableOwnIncludes: disableOwnIncludes,
91104
disableObjCache: disableObjCache,
92-
disableLocalCxx: localCxxQueueSize == 0,
105+
disableLocalCxx: maxLocalCxxProcesses == 0,
93106
activeInvocations: make(map[uint32]*Invocation, 300),
94107
includesCache: make(map[string]*IncludesCache, 1),
95108
}
96109

97-
for _, remoteHostPort := range remoteNoccHosts {
98-
remote, err := MakeRemoteConnection(daemon, remoteHostPort, 1, 1)
99-
if err != nil {
100-
remote.isUnavailable = true
101-
logClient.Error("error connecting to", remoteHostPort, err)
102-
}
103-
daemon.remoteConnections = append(daemon.remoteConnections, remote)
110+
// connect to all remotes in parallel
111+
wg := sync.WaitGroup{}
112+
wg.Add(len(remoteNoccHosts))
113+
114+
ctxConnect, cancelFunc := context.WithTimeout(context.Background(), 5000*time.Millisecond)
115+
defer cancelFunc()
116+
117+
for index, remoteHostPort := range remoteNoccHosts {
118+
go func(index int, remoteHostPort string) {
119+
remote, err := MakeRemoteConnection(daemon, remoteHostPort, ctxConnect)
120+
if err != nil {
121+
remote.isUnavailable = true
122+
logClient.Error("error connecting to", remoteHostPort, err)
123+
}
124+
125+
daemon.remoteConnections[index] = remote
126+
wg.Done()
127+
}(index, remoteHostPort)
104128
}
129+
wg.Wait()
105130

106131
return daemon, nil
107132
}
@@ -202,10 +227,10 @@ func (daemon *Daemon) HandleInvocation(req DaemonSockRequest) DaemonSockResponse
202227
}
203228

204229
remote := daemon.chooseRemoteConnectionForCppCompilation(invocation.cppInFile)
205-
invocation.summary.remoteHostPort = remote.remoteHostPort
230+
invocation.summary.remoteHost = remote.remoteHost
206231

207232
if remote.isUnavailable {
208-
return daemon.FallbackToLocalCxx(req, fmt.Errorf("remote %s is unavailable", remote.remoteHostPort))
233+
return daemon.FallbackToLocalCxx(req, fmt.Errorf("remote %s is unavailable", remote.remoteHost))
209234
}
210235

211236
daemon.mu.Lock()
@@ -292,7 +317,7 @@ func (daemon *Daemon) PeriodicallyInterruptHangedInvocations() {
292317
daemon.mu.Lock()
293318
for _, invocation := range daemon.activeInvocations {
294319
if time.Since(invocation.createTime) > timeoutForceInterruptInvocation {
295-
invocation.ForceInterrupt(fmt.Errorf("interrupt sessionID %d after %d sec timeout", invocation.sessionID, int(time.Since(invocation.createTime).Seconds())))
320+
invocation.ForceInterrupt(fmt.Errorf("interrupt sessionID %d (%s) after %d sec timeout", invocation.sessionID, invocation.summary.remoteHost, int(time.Since(invocation.createTime).Seconds())))
296321
}
297322
}
298323
daemon.mu.Unlock()

internal/client/files-receiving.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func receiveObjFileByChunks(stream pb.CompilationService_RecvCompiledObjStreamCl
153153
return errWrite, false
154154
}
155155

156-
fileTmp, errWrite := common.OpenTempFile(objOutFile, false)
156+
fileTmp, errWrite := common.OpenTempFile(objOutFile)
157157
if errWrite == nil {
158158
_, errWrite = fileTmp.Write(firstChunk.ChunkBody)
159159
}

internal/client/invocation-summary.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ type invocationTimingItem struct {
1717
// It's mostly for developing/debugging purposes: multiple nocc invocations are appended to a single log file,
1818
// from which we can compute statistics, average and percentiles, either in total or partitioned by hosts.
1919
type InvocationSummary struct {
20-
remoteHostPort string
20+
remoteHost string
2121

2222
nIncludes int
2323
nFilesSent int
@@ -44,7 +44,7 @@ func (s *InvocationSummary) ToLogString(invocation *Invocation) string {
4444

4545
b := strings.Builder{}
4646
fmt.Fprintf(&b, "cppInFile=%q, remote=%s, sessionID=%d, nIncludes=%d, nFilesSent=%d, nBytesSent=%d, nBytesReceived=%d, cxxDuration=%dms",
47-
invocation.cppInFile, s.remoteHostPort, invocation.sessionID, s.nIncludes, s.nFilesSent, s.nBytesSent, s.nBytesReceived, invocation.cxxDuration)
47+
invocation.cppInFile, s.remoteHost, invocation.sessionID, s.nIncludes, s.nFilesSent, s.nBytesSent, s.nBytesReceived, invocation.cxxDuration)
4848

4949
prevTime := invocation.createTime
5050
fmt.Fprintf(&b, ", started=0ms")

0 commit comments

Comments
 (0)