Skip to content

Commit 3e3e117

Browse files
WIP: first pass at reconfiguration support (#118)
* WIP: first pass at reconfiguration support * fix audit event name improve instance reports for homer-ts * handle reconfiguration rest call * tighter date formatting * moved reconfiguration to separate class changed reconfigure to return date string instead of boolean changed function names to reflect date output of configuration * cleanup init of objects * enable reconfiguration flag on groups * rename status to reconfigureDate, no longer receive as input * remove lock, add try/catch for error case * pipeline for audit instance update TTL commands remove pipeline for individual set command * audit reconfiguration requests, report latest
1 parent fcb9e2d commit 3e3e117

9 files changed

Lines changed: 352 additions & 91 deletions

src/app.ts

Lines changed: 59 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import InstanceLauncher from './instance_launcher';
1717
import LockManager from './lock_manager';
1818
import * as stats from './stats';
1919
import ShutdownManager from './shutdown_manager';
20+
import ReconfigureManager from './reconfigure_manager';
2021
import JobManager from './job_manager';
2122
import GroupReportGenerator from './group_report';
2223
import Audit from './audit';
@@ -69,7 +70,7 @@ if (config.RedisDb) {
6970
const redisClient = new Redis(redisOptions);
7071

7172
const audit = new Audit({
72-
redisClient: redisClient,
73+
redisClient,
7374
redisScanCount: config.RedisScanCount,
7475
auditTTL: config.AuditTTL,
7576
groupRelatedDataTTL: config.GroupRelatedDataTTL,
@@ -78,14 +79,20 @@ const audit = new Audit({
7879
const shutdownManager = new ShutdownManager({
7980
redisClient,
8081
shutdownTTL: config.ShutDownTTL,
81-
audit: audit,
82+
audit,
83+
});
84+
85+
const reconfigureManager = new ReconfigureManager({
86+
redisClient,
87+
reconfigureTTL: config.ReconfigureTTL,
88+
audit,
8289
});
8390

8491
const instanceTracker = new InstanceTracker({
8592
redisClient,
8693
redisScanCount: config.RedisScanCount,
87-
shutdownManager: shutdownManager,
88-
audit: audit,
94+
shutdownManager,
95+
audit,
8996
idleTTL: config.IdleTTL,
9097
metricTTL: config.MetricTTL,
9198
provisioningTTL: config.ProvisioningTTL,
@@ -94,29 +101,27 @@ const instanceTracker = new InstanceTracker({
94101
});
95102

96103
const cloudManager = new CloudManager({
97-
shutdownManager: shutdownManager,
104+
shutdownManager,
98105
isDryRun: config.DryRun,
99106
ociConfigurationFilePath: config.OciConfigurationFilePath,
100107
ociConfigurationProfile: config.OciConfigurationProfile,
101108
digitalOceanAPIToken: config.DigitalOceanAPIToken,
102109
digitalOceanConfigurationFilePath: config.DigitalOceanConfigurationFilePath,
103-
104-
instanceTracker: instanceTracker,
105-
audit: audit,
110+
instanceTracker,
111+
audit,
106112
cloudProviders: config.CloudProviders,
107-
108113
customConfigurationLaunchScriptPath: config.CustomConfigurationLaunchScriptPath,
109114
customConfigurationLaunchScriptTimeoutMs: config.CustomConfigurationLaunchScriptTimeoutMs,
110115
});
111116

112117
const lockManager: LockManager = new LockManager(logger, {
113-
redisClient: redisClient,
118+
redisClient,
114119
jobCreationLockTTL: config.JobsCreationLockTTLMs,
115120
groupLockTTLMs: config.GroupLockTTLMs,
116121
});
117122

118123
const instanceGroupManager = new InstanceGroupManager({
119-
redisClient: redisClient,
124+
redisClient,
120125
redisScanCount: config.RedisScanCount,
121126
initialGroupList: config.GroupList,
122127
groupJobsCreationGracePeriod: config.GroupJobsCreationGracePeriodSec,
@@ -135,12 +140,12 @@ instanceGroupManager.init(initCtx).catch((err) => {
135140
});
136141

137142
const autoscaleProcessor = new AutoscaleProcessor({
138-
instanceTracker: instanceTracker,
139-
cloudManager: cloudManager,
140-
instanceGroupManager: instanceGroupManager,
141-
lockManager: lockManager,
143+
instanceTracker,
144+
cloudManager,
145+
instanceGroupManager,
146+
lockManager,
142147
redisClient,
143-
audit: audit,
148+
audit,
144149
});
145150

146151
const metricsLoop = new MetricsLoop({
@@ -153,45 +158,46 @@ const metricsLoop = new MetricsLoop({
153158

154159
const instanceLauncher = new InstanceLauncher({
155160
maxThrottleThreshold: config.MaxThrottleThreshold,
156-
instanceTracker: instanceTracker,
157-
cloudManager: cloudManager,
158-
instanceGroupManager: instanceGroupManager,
159-
lockManager: lockManager,
161+
instanceTracker,
162+
cloudManager,
163+
instanceGroupManager,
164+
lockManager,
160165
redisClient,
161166
shutdownManager,
162-
audit: audit,
167+
audit,
163168
metricsLoop,
164169
});
165170

166171
const groupReportGenerator = new GroupReportGenerator({
167-
instanceTracker: instanceTracker,
168-
shutdownManager: shutdownManager,
169-
metricsLoop: metricsLoop,
172+
instanceTracker,
173+
shutdownManager,
174+
reconfigureManager,
175+
metricsLoop,
170176
});
171177

172178
const sanityLoop = new SanityLoop({
173-
redisClient: redisClient,
179+
redisClient,
174180
metricsTTL: config.ServiceLevelMetricsTTL,
175-
cloudManager: cloudManager,
181+
cloudManager,
176182
reportExtCallRetryStrategy: {
177183
maxTimeInSeconds: config.ReportExtCallMaxTimeInSeconds,
178184
maxDelayInSeconds: config.ReportExtCallMaxDelayInSeconds,
179185
retryableStatusCodes: config.ReportExtCallRetryableStatusCodes,
180186
},
181-
groupReportGenerator: groupReportGenerator,
182-
instanceGroupManager: instanceGroupManager,
187+
groupReportGenerator,
188+
instanceGroupManager,
183189
});
184190

185191
// Each Queue in JobManager has its own Redis connection (other than the one in RedisClient)
186192
// Bee-Queue also uses different a Redis library, so we map redisOptions to the object expected by Bee-Queue
187193
const jobManager = new JobManager({
188194
queueRedisOptions: redisQueueOptions,
189-
lockManager: lockManager,
190-
instanceGroupManager: instanceGroupManager,
191-
instanceLauncher: instanceLauncher,
195+
lockManager,
196+
instanceGroupManager,
197+
instanceLauncher,
192198
autoscaler: autoscaleProcessor,
193-
sanityLoop: sanityLoop,
194-
metricsLoop: metricsLoop,
199+
sanityLoop,
200+
metricsLoop,
195201
autoscalerProcessingTimeoutMs: config.GroupProcessingTimeoutMs,
196202
launcherProcessingTimeoutMs: config.GroupProcessingTimeoutMs,
197203
sanityLoopProcessingTimeoutMs: config.SanityProcessingTimoutMs,
@@ -252,13 +258,14 @@ async function pollForMetrics(metricsLoop: MetricsLoop) {
252258
}
253259

254260
const h = new Handlers({
255-
instanceTracker: instanceTracker,
256-
instanceGroupManager: instanceGroupManager,
257-
shutdownManager: shutdownManager,
258-
groupReportGenerator: groupReportGenerator,
259-
lockManager: lockManager,
260-
audit: audit,
261-
scalingManager: scalingManager,
261+
instanceTracker,
262+
instanceGroupManager,
263+
shutdownManager,
264+
reconfigureManager,
265+
groupReportGenerator,
266+
lockManager,
267+
audit,
268+
scalingManager,
262269
});
263270

264271
const validator = new Validator({ instanceTracker, instanceGroupManager });
@@ -551,6 +558,18 @@ app.put(
551558
},
552559
);
553560

561+
app.post('/groups/:name/actions/reconfigure-instances', async (req, res, next) => {
562+
try {
563+
const errors = validationResult(req);
564+
if (!errors.isEmpty()) {
565+
return res.status(400).json({ errors: errors.array() });
566+
}
567+
await h.reconfigureInstanceGroup(req, res);
568+
} catch (err) {
569+
next(err);
570+
}
571+
});
572+
554573
app.listen(config.HTTPServerPort, () => {
555574
logger.info(`...listening on :${config.HTTPServerPort}`);
556575
});

src/audit.ts

Lines changed: 74 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ export interface LauncherActionItem {
3737
export interface GroupAuditResponse {
3838
lastLauncherRun: string;
3939
lastAutoScalerRun: string;
40+
lastReconfigureRequest: string;
4041
autoScalerActionItems?: AutoScalerActionItem[];
4142
launcherActionItems?: LauncherActionItem[];
4243
}
@@ -46,6 +47,8 @@ export interface InstanceAuditResponse {
4647
requestToLaunch: string;
4748
latestStatus: string;
4849
requestToTerminate: string;
50+
requestToReconfigure: string;
51+
reconfigureComplete: string;
4952
latestStatusInfo?: InstanceState;
5053
}
5154

@@ -82,12 +85,23 @@ export default class Audit {
8285
this.auditTTL,
8386
);
8487
if (latestStatusSaved) {
85-
this.increaseLaunchEventExpiration(groupName, instanceId);
86-
this.increaseShutdownEventExpiration(groupName, instanceId);
88+
this.increaseInstanceExpirations(groupName, instanceId);
8789
}
8890
return latestStatusSaved;
8991
}
9092

93+
async increaseInstanceExpirations(groupName: string, instanceId: string): Promise<boolean> {
94+
const pipeline = this.redisClient.pipeline();
95+
96+
pipeline.expire(`audit:${groupName}:${instanceId}:request-to-launch`, this.auditTTL);
97+
pipeline.expire(`audit:${groupName}:${instanceId}:request-to-terminate`, this.auditTTL);
98+
pipeline.expire(`audit:${groupName}:${instanceId}:request-to-reconfigure`, this.auditTTL);
99+
pipeline.expire(`audit:${groupName}:${instanceId}:reconfigure-complete`, this.auditTTL);
100+
101+
await pipeline.exec();
102+
103+
return true;
104+
}
91105
async saveLaunchEvent(groupName: string, instanceId: string): Promise<boolean> {
92106
const value: InstanceAudit = {
93107
instanceId: instanceId,
@@ -97,15 +111,6 @@ export default class Audit {
97111
return this.setInstanceValue(`audit:${groupName}:${instanceId}:request-to-launch`, value, this.auditTTL);
98112
}
99113

100-
private async increaseLaunchEventExpiration(groupName: string, instanceId: string): Promise<boolean> {
101-
// we don't care if this fails (e.g. perhaps the event no longer is there)
102-
const result = await this.redisClient.expire(
103-
`audit:${groupName}:${instanceId}:request-to-launch`,
104-
this.auditTTL,
105-
);
106-
return result == 1;
107-
}
108-
109114
async saveShutdownEvents(instanceDetails: Array<InstanceDetails>): Promise<void> {
110115
const pipeline = this.redisClient.pipeline();
111116
for (const instance of instanceDetails) {
@@ -124,13 +129,36 @@ export default class Audit {
124129
await pipeline.exec();
125130
}
126131

127-
private async increaseShutdownEventExpiration(groupName: string, instanceId: string): Promise<boolean> {
128-
// we don't care if this fails (e.g. perhaps the event no longer is there)
129-
const result = await this.redisClient.expire(
130-
`audit:${groupName}:${instanceId}:request-to-terminate`,
132+
async saveUnsetReconfigureEvents(instanceId: string, group: string): Promise<void> {
133+
const value: InstanceAudit = {
134+
instanceId: instanceId,
135+
type: 'reconfigure-complete',
136+
timestamp: Date.now(),
137+
};
138+
await this.redisClient.set(
139+
`audit:${group}:${instanceId}:reconfigure-complete`,
140+
JSON.stringify(value),
141+
'ex',
131142
this.auditTTL,
132143
);
133-
return result == 1;
144+
}
145+
146+
async saveReconfigureEvents(instanceDetails: Array<InstanceDetails>): Promise<void> {
147+
const pipeline = this.redisClient.pipeline();
148+
for (const instance of instanceDetails) {
149+
const value: InstanceAudit = {
150+
instanceId: instance.instanceId,
151+
type: 'request-to-reconfigure',
152+
timestamp: Date.now(),
153+
};
154+
pipeline.set(
155+
`audit:${instance.group}:${instance.instanceId}:request-to-reconfigure`,
156+
JSON.stringify(value),
157+
'ex',
158+
this.auditTTL,
159+
);
160+
}
161+
await pipeline.exec();
134162
}
135163

136164
async setInstanceValue(key: string, value: InstanceAudit, ttl: number): Promise<boolean> {
@@ -159,6 +187,17 @@ export default class Audit {
159187
return true;
160188
}
161189

190+
async updateLastReconfigureRequest(ctx: Context, groupName: string): Promise<boolean> {
191+
const value: GroupAudit = {
192+
groupName: groupName,
193+
type: 'last-reconfigure-request',
194+
};
195+
const updateResponse = this.setGroupValue(groupName, value);
196+
ctx.logger.info(`Updated last reconfiguration request for group ${groupName}`);
197+
198+
return updateResponse;
199+
}
200+
162201
async updateLastLauncherRun(ctx: Context, groupName: string): Promise<boolean> {
163202
const updateLastLaunchStart = process.hrtime();
164203

@@ -257,6 +296,8 @@ export default class Audit {
257296
requestToLaunch: 'unknown',
258297
latestStatus: 'unknown',
259298
requestToTerminate: 'unknown',
299+
requestToReconfigure: 'unknown',
300+
reconfigureComplete: 'unknown',
260301
};
261302
instanceAuditResponseList.push(instanceAuditResponse);
262303
});
@@ -267,13 +308,19 @@ export default class Audit {
267308
)) {
268309
switch (instanceAudit.type) {
269310
case 'request-to-launch':
270-
instanceAuditResponse.requestToLaunch = new Date(instanceAudit.timestamp).toUTCString();
311+
instanceAuditResponse.requestToLaunch = new Date(instanceAudit.timestamp).toISOString();
271312
break;
272313
case 'request-to-terminate':
273-
instanceAuditResponse.requestToTerminate = new Date(instanceAudit.timestamp).toUTCString();
314+
instanceAuditResponse.requestToTerminate = new Date(instanceAudit.timestamp).toISOString();
315+
break;
316+
case 'request-to-reconfigure':
317+
instanceAuditResponse.requestToReconfigure = new Date(instanceAudit.timestamp).toISOString();
318+
break;
319+
case 'reconfigure-complete':
320+
instanceAuditResponse.reconfigureComplete = new Date(instanceAudit.timestamp).toISOString();
274321
break;
275322
case 'latest-status':
276-
instanceAuditResponse.latestStatus = new Date(instanceAudit.timestamp).toUTCString();
323+
instanceAuditResponse.latestStatus = new Date(instanceAudit.timestamp).toISOString();
277324
instanceAuditResponse.latestStatusInfo = instanceAudit.state;
278325
break;
279326
}
@@ -289,17 +336,21 @@ export default class Audit {
289336
const groupAuditResponse: GroupAuditResponse = {
290337
lastLauncherRun: 'unknown',
291338
lastAutoScalerRun: 'unknown',
339+
lastReconfigureRequest: 'unknown',
292340
};
293341

294342
const autoScalerActionItems: AutoScalerActionItem[] = [];
295343
const launcherActionItems: LauncherActionItem[] = [];
296344
for (const groupAudit of groupAudits) {
297345
switch (groupAudit.type) {
298346
case 'last-launcher-run':
299-
groupAuditResponse.lastLauncherRun = new Date(groupAudit.timestamp).toUTCString();
347+
groupAuditResponse.lastLauncherRun = new Date(groupAudit.timestamp).toISOString();
300348
break;
301349
case 'last-autoScaler-run':
302-
groupAuditResponse.lastAutoScalerRun = new Date(groupAudit.timestamp).toUTCString();
350+
groupAuditResponse.lastAutoScalerRun = new Date(groupAudit.timestamp).toISOString();
351+
break;
352+
case 'last-reconfigure-request':
353+
groupAuditResponse.lastReconfigureRequest = new Date(groupAudit.timestamp).toISOString();
303354
break;
304355
case 'launcher-action-item':
305356
launcherActionItems.push(groupAudit.launcherActionItem);
@@ -312,12 +363,12 @@ export default class Audit {
312363
autoScalerActionItems
313364
.sort((a, b) => (a.timestamp > b.timestamp ? -1 : 1))
314365
.map(function (key) {
315-
key.timestamp = new Date(key.timestamp).toUTCString();
366+
key.timestamp = new Date(key.timestamp).toISOString();
316367
});
317368
launcherActionItems
318369
.sort((a, b) => (a.timestamp > b.timestamp ? -1 : 1))
319370
.map(function (key) {
320-
key.timestamp = new Date(key.timestamp).toUTCString();
371+
key.timestamp = new Date(key.timestamp).toISOString();
321372
});
322373

323374
groupAuditResponse.autoScalerActionItems = autoScalerActionItems;

0 commit comments

Comments
 (0)