Skip to content

Commit f67af10

Browse files
authored
Merge pull request #1411 from onetechnical/onetechnical/relbeta2.1.3
Onetechnical/relbeta2.1.3
2 parents 2fc1913 + 271860a commit f67af10

14 files changed

+171
-11
lines changed

buildnumber.dat

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2
1+
3

catchup/service.go

+12
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ type Ledger interface {
5656
EnsureBlock(block *bookkeeping.Block, c agreement.Certificate)
5757
LastRound() basics.Round
5858
Block(basics.Round) (bookkeeping.Block, error)
59+
IsWritingCatchpointFile() bool
5960
}
6061

6162
// Service represents the catchup service. Once started and until it is stopped, it ensures that the ledger is up to date with network.
@@ -385,6 +386,12 @@ func (s *Service) pipelinedFetch(seedLookback uint64) {
385386
// there was an error
386387
return
387388
}
389+
// if we're writing a catchpoint file, stop catching up to reduce the memory pressure. Once we finish writing the file we
390+
// could resume with the catchup.
391+
if s.ledger.IsWritingCatchpointFile() {
392+
s.log.Info("Catchup is stopping due to catchpoint file being written")
393+
return
394+
}
388395
completedRounds[round] = true
389396
// fetch rounds we can validate
390397
for completedRounds[nextRound-basics.Round(parallelRequests)] {
@@ -436,6 +443,11 @@ func (s *Service) periodicSync() {
436443
sleepDuration = s.deadlineTimeout
437444
continue
438445
}
446+
// check to see if we're currently writing a catchpoint file. If so, wait longer before attempting again.
447+
if s.ledger.IsWritingCatchpointFile() {
448+
// keep the existing sleep duration and try again later.
449+
continue
450+
}
439451
s.log.Info("It's been too long since our ledger advanced; resyncing")
440452
s.sync(nil)
441453
case cert := <-s.unmatchedPendingCertificates:

catchup/service_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,10 @@ func (m *mockedLedger) LookupDigest(basics.Round) (crypto.Digest, error) {
577577
return crypto.Digest{}, errors.New("not needed for mockedLedger")
578578
}
579579

580+
func (m *mockedLedger) IsWritingCatchpointFile() bool {
581+
return false
582+
}
583+
580584
func testingenv(t testing.TB, numBlocks int) (ledger, emptyLedger Ledger) {
581585
mLedger := new(mockedLedger)
582586
mEmptyLedger := new(mockedLedger)

cmd/goal/node.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,8 @@ func makeStatusString(stat generatedV2.NodeStatusResponse) string {
422422
*stat.Catchpoint)
423423

424424
if stat.CatchpointTotalAccounts != nil && (*stat.CatchpointTotalAccounts > 0) && stat.CatchpointProcessedAccounts != nil {
425-
statusString = statusString + "\n" + fmt.Sprintf(infoNodeCatchpointCatchupAccounts, *stat.CatchpointProcessedAccounts,
426-
*stat.CatchpointTotalAccounts)
425+
statusString = statusString + "\n" + fmt.Sprintf(infoNodeCatchpointCatchupAccounts, *stat.CatchpointTotalAccounts,
426+
*stat.CatchpointProcessedAccounts)
427427
}
428428
if stat.CatchpointAcquiredBlocks != nil && stat.CatchpointTotalBlocks != nil && (*stat.CatchpointAcquiredBlocks+*stat.CatchpointTotalBlocks > 0) {
429429
statusString = statusString + "\n" + fmt.Sprintf(infoNodeCatchpointCatchupBlocks, *stat.CatchpointTotalBlocks,

config/config.go

+8-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ type Local struct {
6363
// Version tracks the current version of the defaults so we can migrate old -> new
6464
// This is specifically important whenever we decide to change the default value
6565
// for an existing parameter. This field tag must be updated any time we add a new version.
66-
Version uint32 `version[0]:"0" version[1]:"1" version[2]:"2" version[3]:"3" version[4]:"4" version[5]:"5" version[6]:"6" version[7]:"7" version[8]:"8" version[9]:"9" version[10]:"10"`
66+
Version uint32 `version[0]:"0" version[1]:"1" version[2]:"2" version[3]:"3" version[4]:"4" version[5]:"5" version[6]:"6" version[7]:"7" version[8]:"8" version[9]:"9" version[10]:"10" version[11]:"11"`
6767

6868
// environmental (may be overridden)
6969
// When enabled, stores blocks indefinitally, otherwise, only the most recents blocks
@@ -328,6 +328,13 @@ type Local struct {
328328
// OptimizeAccountsDatabaseOnStartup controls whether the accounts database would be optimized
329329
// on algod startup.
330330
OptimizeAccountsDatabaseOnStartup bool `version[10]:"false"`
331+
332+
// CatchpointTracking determines if catchpoints are going to be tracked. The value is interpreted as follows:
333+
// A of -1 means "don't track catchpoints".
334+
// A value of 1 means "track catchpoints as long as CatchpointInterval is also set to a positive non-zero value". If CatchpointInterval <= 0, no catchpoint tracking would be performed.
335+
// A value of 0 means automatic, which is the default value. In this mode, a non archival node would not track the catchpoints, and an archival node would track the catchpoints as long as CatchpointInterval > 0.
336+
// Other values of CatchpointTracking would give a warning in the log file, and would behave as if the default value was provided.
337+
CatchpointTracking int64 `version[11]:"0"`
331338
}
332339

333340
// Filenames of config files within the configdir (e.g. ~/.algorand)

config/local_defaults.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@
2020
package config
2121

2222
var defaultLocal = Local{
23-
Version: 10,
23+
Version: 11,
2424
AnnounceParticipationKey: true,
2525
Archival: false,
2626
BaseLoggerDebugLevel: 4,
2727
BroadcastConnectionsLimit: -1,
2828
CadaverSizeTarget: 1073741824,
2929
CatchpointFileHistoryLength: 365,
3030
CatchpointInterval: 10000,
31+
CatchpointTracking: 0,
3132
CatchupBlockDownloadRetryAttempts: 1000,
3233
CatchupFailurePeerRefreshRate: 10,
3334
CatchupGossipBlockFetchTimeoutSec: 4,

installer/config.json.example

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
{
2-
"Version": 10,
2+
"Version": 11,
33
"AnnounceParticipationKey": true,
44
"Archival": false,
55
"BaseLoggerDebugLevel": 4,
66
"BroadcastConnectionsLimit": -1,
77
"CadaverSizeTarget": 1073741824,
88
"CatchpointFileHistoryLength": 365,
99
"CatchpointInterval": 10000,
10+
"CatchpointTracking": 0,
1011
"CatchupBlockDownloadRetryAttempts": 1000,
1112
"CatchupFailurePeerRefreshRate": 10,
1213
"CatchupGossipBlockFetchTimeoutSec": 4,

ledger/acctupdates.go

+39-1
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,23 @@ func (au *accountUpdates) initialize(cfg config.Local, dbPathPrefix string, gene
218218
au.initAccounts = genesisAccounts
219219
au.dbDirectory = filepath.Dir(dbPathPrefix)
220220
au.archivalLedger = cfg.Archival
221-
au.catchpointInterval = cfg.CatchpointInterval
221+
switch cfg.CatchpointTracking {
222+
case -1:
223+
au.catchpointInterval = 0
224+
default:
225+
// give a warning, then fall thought
226+
logging.Base().Warnf("accountUpdates: the CatchpointTracking field in the config.json file contains an invalid value (%d). The default value of 0 would be used instead.", cfg.CatchpointTracking)
227+
fallthrough
228+
case 0:
229+
if au.archivalLedger {
230+
au.catchpointInterval = cfg.CatchpointInterval
231+
} else {
232+
au.catchpointInterval = 0
233+
}
234+
case 1:
235+
au.catchpointInterval = cfg.CatchpointInterval
236+
}
237+
222238
au.catchpointFileHistoryLength = cfg.CatchpointFileHistoryLength
223239
if cfg.CatchpointFileHistoryLength < -1 {
224240
au.catchpointFileHistoryLength = -1
@@ -275,6 +291,22 @@ func (au *accountUpdates) close() {
275291
<-au.commitSyncerClosed
276292
}
277293

294+
// IsWritingCatchpointFile returns true when a catchpoint file is being generated. The function is used by the catchup service
295+
// to avoid memory pressure until the catchpoint file writing is complete.
296+
func (au *accountUpdates) IsWritingCatchpointFile() bool {
297+
au.accountsMu.Lock()
298+
defer au.accountsMu.Unlock()
299+
// if we're still writing the previous balances, we can't move forward yet.
300+
select {
301+
case <-au.catchpointWriting:
302+
// the channel catchpointWriting is currently closed, meaning that we're currently not writing any
303+
// catchpoint file.
304+
return false
305+
default:
306+
return true
307+
}
308+
}
309+
278310
// Lookup returns the accound data for a given address at a given round. The withRewards indicates whether the
279311
// rewards should be added to the AccountData before returning. Note that the function doesn't update the account with the rewards,
280312
// even while it could return the AccoutData which represent the "rewarded" account data.
@@ -1579,6 +1611,12 @@ func (au *accountUpdates) generateCatchpoint(committedRound basics.Round, label
15791611
db.ResetTransactionWarnDeadline(ctx, tx, time.Now().Add(1*time.Second))
15801612
select {
15811613
case <-time.After(100 * time.Millisecond):
1614+
// increase the time slot allocated for writing the catchpoint, but stop when we get to the longChunkExecutionDuration limit.
1615+
// this would allow the catchpoint writing speed to ramp up while still leaving some cpu available.
1616+
chunkExecutionDuration *= 2
1617+
if chunkExecutionDuration > longChunkExecutionDuration {
1618+
chunkExecutionDuration = longChunkExecutionDuration
1619+
}
15821620
case <-au.ctx.Done():
15831621
retryCatchpointCreation = true
15841622
err2 := catchpointWriter.Abort()

ledger/ledger.go

+8
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,14 @@ func (l *Ledger) trackerEvalVerified(blk bookkeeping.Block, accUpdatesLedger led
584584
return eval(context.Background(), accUpdatesLedger, blk, false, nil, nil)
585585
}
586586

587+
// IsWritingCatchpointFile returns true when a catchpoint file is being generated. The function is used by the catchup service
588+
// to avoid memory pressure until the catchpoint file writing is complete.
589+
func (l *Ledger) IsWritingCatchpointFile() bool {
590+
l.trackerMu.RLock()
591+
defer l.trackerMu.RUnlock()
592+
return l.accts.IsWritingCatchpointFile()
593+
}
594+
587595
// A txlease is a transaction (sender, lease) pair which uniquely specifies a
588596
// transaction lease.
589597
type txlease struct {
+9-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
#!/usr/bin/env bash
22

3-
if [[ "$1" =~ ^rel/ ]]; then
4-
echo "disable"
5-
else
6-
echo "enable"
3+
# if the user ( i.e. algorand developer ) has explicitly enabled the deadlock detection in his environment, we want to enable it.
4+
if [ "$ALGORAND_DEADLOCK" != "" ]; then
5+
echo "$ALGORAND_DEADLOCK"
6+
exit 0
77
fi
8+
9+
# we used to disable the deadlock on all release builds, which cause issues with individuals who compiled it on their own.
10+
# as a result, we decided to disable it always, unless we're running on travis. If we'll ever want to make it dependent
11+
# on the build branch, the build branch is available in $1. ( i.e. if [[ "$1" =~ ^rel/ ]]; then ... )
12+
echo "disable"

scripts/travis/build_test.sh

+2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
# Examples: scripts/travis/build_test.sh
1010
set -e
1111

12+
ALGORAND_DEADLOCK=enable
13+
export ALGORAND_DEADLOCK
1214
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
1315

1416
if [ "${USER}" = "travis" ]; then

scripts/travis/integration_test.sh

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
# Examples: scripts/travis/integration_test.sh
1010
set -e
1111

12+
ALGORAND_DEADLOCK=enable
13+
export ALGORAND_DEADLOCK
14+
1215
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
1316

1417
export BUILD_TYPE="integration"

scripts/travis/test_release.sh

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
set -e
1010

11+
ALGORAND_DEADLOCK=enable
12+
export ALGORAND_DEADLOCK
1113
BRANCH=$(./scripts/compute_branch.sh)
1214
CHANNEL=$(./scripts/compute_branch_channel.sh "$BRANCH")
1315

test/testdata/configs/config-v11.json

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
{
2+
"Version": 11,
3+
"AnnounceParticipationKey": true,
4+
"Archival": false,
5+
"BaseLoggerDebugLevel": 4,
6+
"BroadcastConnectionsLimit": -1,
7+
"CadaverSizeTarget": 1073741824,
8+
"CatchpointFileHistoryLength": 365,
9+
"CatchpointInterval": 10000,
10+
"CatchpointTracking": 0,
11+
"CatchupBlockDownloadRetryAttempts": 1000,
12+
"CatchupFailurePeerRefreshRate": 10,
13+
"CatchupGossipBlockFetchTimeoutSec": 4,
14+
"CatchupHTTPBlockFetchTimeoutSec": 4,
15+
"CatchupLedgerDownloadRetryAttempts": 50,
16+
"CatchupParallelBlocks": 16,
17+
"ConnectionsRateLimitingCount": 60,
18+
"ConnectionsRateLimitingWindowSeconds": 1,
19+
"DNSBootstrapID": "<network>.algorand.network",
20+
"DNSSecurityFlags": 1,
21+
"DeadlockDetection": 0,
22+
"DisableOutgoingConnectionThrottling": false,
23+
"EnableAgreementReporting": false,
24+
"EnableAgreementTimeMetrics": false,
25+
"EnableAssembleStats": false,
26+
"EnableBlockService": false,
27+
"EnableDeveloperAPI": false,
28+
"EnableGossipBlockService": true,
29+
"EnableIncomingMessageFilter": false,
30+
"EnableLedgerService": false,
31+
"EnableMetricReporting": false,
32+
"EnableOutgoingNetworkMessageFiltering": true,
33+
"EnablePingHandler": true,
34+
"EnableProcessBlockStats": false,
35+
"EnableProfiler": false,
36+
"EnableRequestLogger": false,
37+
"EnableTopAccountsReporting": false,
38+
"EndpointAddress": "127.0.0.1:0",
39+
"FallbackDNSResolverAddress": "",
40+
"ForceRelayMessages": false,
41+
"GossipFanout": 4,
42+
"IncomingConnectionsLimit": 10000,
43+
"IncomingMessageFilterBucketCount": 5,
44+
"IncomingMessageFilterBucketSize": 512,
45+
"IsIndexerActive": false,
46+
"LogArchiveMaxAge": "",
47+
"LogArchiveName": "node.archive.log",
48+
"LogSizeLimit": 1073741824,
49+
"MaxConnectionsPerIP": 30,
50+
"NetAddress": "",
51+
"NetworkProtocolVersion": "",
52+
"NodeExporterListenAddress": ":9100",
53+
"NodeExporterPath": "./node_exporter",
54+
"OptimizeAccountsDatabaseOnStartup": false,
55+
"OutgoingMessageFilterBucketCount": 3,
56+
"OutgoingMessageFilterBucketSize": 128,
57+
"PeerConnectionsUpdateInterval": 3600,
58+
"PeerPingPeriodSeconds": 0,
59+
"PriorityPeers": {},
60+
"PublicAddress": "",
61+
"ReconnectTime": 60000000000,
62+
"ReservedFDs": 256,
63+
"RestReadTimeoutSeconds": 15,
64+
"RestWriteTimeoutSeconds": 120,
65+
"RunHosted": false,
66+
"SuggestedFeeBlockHistory": 3,
67+
"SuggestedFeeSlidingWindowSize": 50,
68+
"TLSCertFile": "",
69+
"TLSKeyFile": "",
70+
"TelemetryToLog": true,
71+
"TxPoolExponentialIncreaseFactor": 2,
72+
"TxPoolSize": 15000,
73+
"TxSyncIntervalSeconds": 60,
74+
"TxSyncServeResponseSize": 1000000,
75+
"TxSyncTimeoutSeconds": 30,
76+
"UseXForwardedForAddressField": ""
77+
}

0 commit comments

Comments
 (0)