Skip to content

Commit f67af10

Browse files
authored
Merge pull request #1411 from onetechnical/onetechnical/relbeta2.1.3
Onetechnical/relbeta2.1.3
2 parents 2fc1913 + 271860a commit f67af10

File tree

14 files changed

+171
-11
lines changed

14 files changed

+171
-11
lines changed

buildnumber.dat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2
1+
3

catchup/service.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ type Ledger interface {
5656
EnsureBlock(block *bookkeeping.Block, c agreement.Certificate)
5757
LastRound() basics.Round
5858
Block(basics.Round) (bookkeeping.Block, error)
59+
IsWritingCatchpointFile() bool
5960
}
6061

6162
// Service represents the catchup service. Once started and until it is stopped, it ensures that the ledger is up to date with network.
@@ -385,6 +386,12 @@ func (s *Service) pipelinedFetch(seedLookback uint64) {
385386
// there was an error
386387
return
387388
}
389+
// if we're writing a catchpoint file, stop catching up to reduce the memory pressure. Once we finish writing the file we
390+
// could resume with the catchup.
391+
if s.ledger.IsWritingCatchpointFile() {
392+
s.log.Info("Catchup is stopping due to catchpoint file being written")
393+
return
394+
}
388395
completedRounds[round] = true
389396
// fetch rounds we can validate
390397
for completedRounds[nextRound-basics.Round(parallelRequests)] {
@@ -436,6 +443,11 @@ func (s *Service) periodicSync() {
436443
sleepDuration = s.deadlineTimeout
437444
continue
438445
}
446+
// check to see if we're currently writing a catchpoint file. If so, wait longer before attempting again.
447+
if s.ledger.IsWritingCatchpointFile() {
448+
// keep the existing sleep duration and try again later.
449+
continue
450+
}
439451
s.log.Info("It's been too long since our ledger advanced; resyncing")
440452
s.sync(nil)
441453
case cert := <-s.unmatchedPendingCertificates:

catchup/service_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,10 @@ func (m *mockedLedger) LookupDigest(basics.Round) (crypto.Digest, error) {
577577
return crypto.Digest{}, errors.New("not needed for mockedLedger")
578578
}
579579

580+
func (m *mockedLedger) IsWritingCatchpointFile() bool {
581+
return false
582+
}
583+
580584
func testingenv(t testing.TB, numBlocks int) (ledger, emptyLedger Ledger) {
581585
mLedger := new(mockedLedger)
582586
mEmptyLedger := new(mockedLedger)

cmd/goal/node.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,8 @@ func makeStatusString(stat generatedV2.NodeStatusResponse) string {
422422
*stat.Catchpoint)
423423

424424
if stat.CatchpointTotalAccounts != nil && (*stat.CatchpointTotalAccounts > 0) && stat.CatchpointProcessedAccounts != nil {
425-
statusString = statusString + "\n" + fmt.Sprintf(infoNodeCatchpointCatchupAccounts, *stat.CatchpointProcessedAccounts,
426-
*stat.CatchpointTotalAccounts)
425+
statusString = statusString + "\n" + fmt.Sprintf(infoNodeCatchpointCatchupAccounts, *stat.CatchpointTotalAccounts,
426+
*stat.CatchpointProcessedAccounts)
427427
}
428428
if stat.CatchpointAcquiredBlocks != nil && stat.CatchpointTotalBlocks != nil && (*stat.CatchpointAcquiredBlocks+*stat.CatchpointTotalBlocks > 0) {
429429
statusString = statusString + "\n" + fmt.Sprintf(infoNodeCatchpointCatchupBlocks, *stat.CatchpointTotalBlocks,

config/config.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ type Local struct {
6363
// Version tracks the current version of the defaults so we can migrate old -> new
6464
// This is specifically important whenever we decide to change the default value
6565
// for an existing parameter. This field tag must be updated any time we add a new version.
66-
Version uint32 `version[0]:"0" version[1]:"1" version[2]:"2" version[3]:"3" version[4]:"4" version[5]:"5" version[6]:"6" version[7]:"7" version[8]:"8" version[9]:"9" version[10]:"10"`
66+
Version uint32 `version[0]:"0" version[1]:"1" version[2]:"2" version[3]:"3" version[4]:"4" version[5]:"5" version[6]:"6" version[7]:"7" version[8]:"8" version[9]:"9" version[10]:"10" version[11]:"11"`
6767

6868
// environmental (may be overridden)
6969
// When enabled, stores blocks indefinitally, otherwise, only the most recents blocks
@@ -328,6 +328,13 @@ type Local struct {
328328
// OptimizeAccountsDatabaseOnStartup controls whether the accounts database would be optimized
329329
// on algod startup.
330330
OptimizeAccountsDatabaseOnStartup bool `version[10]:"false"`
331+
332+
// CatchpointTracking determines if catchpoints are going to be tracked. The value is interpreted as follows:
333+
// A of -1 means "don't track catchpoints".
334+
// A value of 1 means "track catchpoints as long as CatchpointInterval is also set to a positive non-zero value". If CatchpointInterval <= 0, no catchpoint tracking would be performed.
335+
// A value of 0 means automatic, which is the default value. In this mode, a non archival node would not track the catchpoints, and an archival node would track the catchpoints as long as CatchpointInterval > 0.
336+
// Other values of CatchpointTracking would give a warning in the log file, and would behave as if the default value was provided.
337+
CatchpointTracking int64 `version[11]:"0"`
331338
}
332339

333340
// Filenames of config files within the configdir (e.g. ~/.algorand)

config/local_defaults.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@
2020
package config
2121

2222
var defaultLocal = Local{
23-
Version: 10,
23+
Version: 11,
2424
AnnounceParticipationKey: true,
2525
Archival: false,
2626
BaseLoggerDebugLevel: 4,
2727
BroadcastConnectionsLimit: -1,
2828
CadaverSizeTarget: 1073741824,
2929
CatchpointFileHistoryLength: 365,
3030
CatchpointInterval: 10000,
31+
CatchpointTracking: 0,
3132
CatchupBlockDownloadRetryAttempts: 1000,
3233
CatchupFailurePeerRefreshRate: 10,
3334
CatchupGossipBlockFetchTimeoutSec: 4,

installer/config.json.example

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
{
2-
"Version": 10,
2+
"Version": 11,
33
"AnnounceParticipationKey": true,
44
"Archival": false,
55
"BaseLoggerDebugLevel": 4,
66
"BroadcastConnectionsLimit": -1,
77
"CadaverSizeTarget": 1073741824,
88
"CatchpointFileHistoryLength": 365,
99
"CatchpointInterval": 10000,
10+
"CatchpointTracking": 0,
1011
"CatchupBlockDownloadRetryAttempts": 1000,
1112
"CatchupFailurePeerRefreshRate": 10,
1213
"CatchupGossipBlockFetchTimeoutSec": 4,

ledger/acctupdates.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,23 @@ func (au *accountUpdates) initialize(cfg config.Local, dbPathPrefix string, gene
218218
au.initAccounts = genesisAccounts
219219
au.dbDirectory = filepath.Dir(dbPathPrefix)
220220
au.archivalLedger = cfg.Archival
221-
au.catchpointInterval = cfg.CatchpointInterval
221+
switch cfg.CatchpointTracking {
222+
case -1:
223+
au.catchpointInterval = 0
224+
default:
225+
// give a warning, then fall thought
226+
logging.Base().Warnf("accountUpdates: the CatchpointTracking field in the config.json file contains an invalid value (%d). The default value of 0 would be used instead.", cfg.CatchpointTracking)
227+
fallthrough
228+
case 0:
229+
if au.archivalLedger {
230+
au.catchpointInterval = cfg.CatchpointInterval
231+
} else {
232+
au.catchpointInterval = 0
233+
}
234+
case 1:
235+
au.catchpointInterval = cfg.CatchpointInterval
236+
}
237+
222238
au.catchpointFileHistoryLength = cfg.CatchpointFileHistoryLength
223239
if cfg.CatchpointFileHistoryLength < -1 {
224240
au.catchpointFileHistoryLength = -1
@@ -275,6 +291,22 @@ func (au *accountUpdates) close() {
275291
<-au.commitSyncerClosed
276292
}
277293

294+
// IsWritingCatchpointFile returns true when a catchpoint file is being generated. The function is used by the catchup service
295+
// to avoid memory pressure until the catchpoint file writing is complete.
296+
func (au *accountUpdates) IsWritingCatchpointFile() bool {
297+
au.accountsMu.Lock()
298+
defer au.accountsMu.Unlock()
299+
// if we're still writing the previous balances, we can't move forward yet.
300+
select {
301+
case <-au.catchpointWriting:
302+
// the channel catchpointWriting is currently closed, meaning that we're currently not writing any
303+
// catchpoint file.
304+
return false
305+
default:
306+
return true
307+
}
308+
}
309+
278310
// Lookup returns the accound data for a given address at a given round. The withRewards indicates whether the
279311
// rewards should be added to the AccountData before returning. Note that the function doesn't update the account with the rewards,
280312
// even while it could return the AccoutData which represent the "rewarded" account data.
@@ -1579,6 +1611,12 @@ func (au *accountUpdates) generateCatchpoint(committedRound basics.Round, label
15791611
db.ResetTransactionWarnDeadline(ctx, tx, time.Now().Add(1*time.Second))
15801612
select {
15811613
case <-time.After(100 * time.Millisecond):
1614+
// increase the time slot allocated for writing the catchpoint, but stop when we get to the longChunkExecutionDuration limit.
1615+
// this would allow the catchpoint writing speed to ramp up while still leaving some cpu available.
1616+
chunkExecutionDuration *= 2
1617+
if chunkExecutionDuration > longChunkExecutionDuration {
1618+
chunkExecutionDuration = longChunkExecutionDuration
1619+
}
15821620
case <-au.ctx.Done():
15831621
retryCatchpointCreation = true
15841622
err2 := catchpointWriter.Abort()

ledger/ledger.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,14 @@ func (l *Ledger) trackerEvalVerified(blk bookkeeping.Block, accUpdatesLedger led
584584
return eval(context.Background(), accUpdatesLedger, blk, false, nil, nil)
585585
}
586586

587+
// IsWritingCatchpointFile returns true when a catchpoint file is being generated. The function is used by the catchup service
588+
// to avoid memory pressure until the catchpoint file writing is complete.
589+
func (l *Ledger) IsWritingCatchpointFile() bool {
590+
l.trackerMu.RLock()
591+
defer l.trackerMu.RUnlock()
592+
return l.accts.IsWritingCatchpointFile()
593+
}
594+
587595
// A txlease is a transaction (sender, lease) pair which uniquely specifies a
588596
// transaction lease.
589597
type txlease struct {
Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
#!/usr/bin/env bash
22

3-
if [[ "$1" =~ ^rel/ ]]; then
4-
echo "disable"
5-
else
6-
echo "enable"
3+
# if the user ( i.e. algorand developer ) has explicitly enabled the deadlock detection in his environment, we want to enable it.
4+
if [ "$ALGORAND_DEADLOCK" != "" ]; then
5+
echo "$ALGORAND_DEADLOCK"
6+
exit 0
77
fi
8+
9+
# we used to disable the deadlock on all release builds, which cause issues with individuals who compiled it on their own.
10+
# as a result, we decided to disable it always, unless we're running on travis. If we'll ever want to make it dependent
11+
# on the build branch, the build branch is available in $1. ( i.e. if [[ "$1" =~ ^rel/ ]]; then ... )
12+
echo "disable"

0 commit comments

Comments
 (0)