Skip to content

Commit b5d2b8b

Browse files
authored
Merge pull request #153 from dferguson992/main
feat: add --auto-prompt mode and rename managed-inference to realtime…
2 parents 9568942 + e227c0c commit b5d2b8b

57 files changed

Lines changed: 1131 additions & 488 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 298 additions & 62 deletions
Large diffs are not rendered by default.

bin/cli.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ program
2727

2828
// --- General ---
2929
.addOption(new Option('--skip-prompts', 'Skip interactive prompts and use configuration from other sources'))
30-
.addOption(new Option('--config <path>', 'Path to configuration file'))
30+
.addOption(new Option('--auto-prompt', 'Fill defaults, prompt only for missing required values'))
31+
.addOption(new Option('--config <path>', 'Path to JSON configuration file'))
3132
.addOption(new Option('--project-name <name>', 'Project name'))
3233
.addOption(new Option('--project-dir <dir>', 'Output directory path'))
3334
.addOption(new Option('--force', 'Overwrite existing output directory without prompting'))
@@ -41,7 +42,7 @@ program
4142
.addOption(new Option('--base-image <image>', 'Base container image for Dockerfile'))
4243

4344
// --- Build & Infrastructure ---
44-
.addOption(new Option('--deployment-target <target>', 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)'))
45+
.addOption(new Option('--deployment-target <target>', 'Deployment target (realtime-inference, async-inference, batch-transform, hyperpod-eks)'))
4546
.addOption(new Option('--instance-type <type>', 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'))
4647
.addOption(new Option('--region <region>', 'AWS region'))
4748
.addOption(new Option('--role-arn <arn>', 'IAM role ARN for SageMaker execution'))
@@ -154,7 +155,7 @@ program.configureHelp({
154155

155156
for (const opt of allOptions) {
156157
const long = opt.long || '';
157-
if (['--skip-prompts', '--config', '--project-name', '--project-dir', '--force', '--version', '--help'].includes(long)) {
158+
if (['--skip-prompts', '--auto-prompt', '--config', '--project-name', '--project-dir', '--force', '--version', '--help'].includes(long)) {
158159
groups.general.push(opt);
159160
} else if (['--deployment-config', '--framework', '--model-format', '--model-name', '--model-server', '--base-image'].includes(long)) {
160161
groups.model.push(opt);

config/parameter-schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"schemaVersion": "1.0.0",
33
"deploymentTargets": {
4-
"managed-inference": {
4+
"realtime-inference": {
55
"endpoint": {
66
"initialInstanceCount": {
77
"type": "integer",

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@aws/ml-container-creator",
3-
"version": "0.2.3",
3+
"version": "0.2.4",
44
"description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
55
"type": "module",
66
"main": "src/app.js",

src/app.js

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,22 @@ export async function run(projectName, options) {
156156
console.log(' If your model package lacks an InferenceSpecification, use the S3 path');
157157
console.log(' directly instead: --model-name="s3://bucket/path/model.tar.gz"\n');
158158
}
159+
} else if (configManager.isAutoPrompt()) {
160+
// Auto-prompt mode: run the wizard with all resolved values pre-filled.
161+
// The wizard skips prompts for values already in explicitConfig and
162+
// uses phase-level gates to skip irrelevant sections entirely.
163+
// This gives context-aware prompting (correct MCP queries, filtered choices)
164+
// while only asking for what's truly missing.
165+
console.log('\n🔄 Auto-prompt mode — prompting only for missing values with full context');
166+
167+
const promptRunner = new PromptRunner({
168+
configManager,
169+
options: kebabOptions,
170+
registryConfigManager,
171+
baseConfig
172+
});
173+
const promptAnswers = await promptRunner.run();
174+
answers = configManager.getFinalConfiguration(promptAnswers);
159175
} else {
160176
const promptRunner = new PromptRunner({
161177
configManager,
@@ -482,7 +498,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
482498
testTypes: [],
483499
buildTimestamp: new Date().toISOString(),
484500
buildTarget: 'codebuild',
485-
deploymentTarget: 'managed-inference',
501+
deploymentTarget: 'realtime-inference',
486502
hyperPodCluster: null,
487503
hyperPodNamespace: 'default',
488504
hyperPodReplicas: 1,

src/lib/auto-prompt-builder.js

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
/**
5+
* Auto-Prompt Builder — generates targeted prompts for missing required parameters.
6+
*
7+
* Used by --auto-prompt mode to ask only for values that cannot be inferred
8+
* or defaulted from the provided CLI flags.
9+
*/
10+
11+
/**
12+
* Builds a minimal set of prompts for the given missing parameters.
13+
* Each prompt is self-contained and doesn't depend on multi-phase wizard state.
14+
*
15+
* @param {string[]} missingParams - Parameter names that need values
16+
* @param {object} currentConfig - Current configuration (with defaults filled)
17+
* @returns {Array} Array of prompt objects compatible with runPrompts()
18+
*/
19+
export function buildAutoPrompts(missingParams, currentConfig) {
20+
const prompts = [];
21+
22+
for (const param of missingParams) {
23+
const builder = PROMPT_BUILDERS[param];
24+
if (builder) {
25+
const prompt = builder(currentConfig);
26+
if (prompt) {
27+
prompts.push(prompt);
28+
}
29+
} else {
30+
// Fallback: generic text input for unknown parameters
31+
prompts.push({
32+
type: 'input',
33+
name: param,
34+
message: `Enter value for ${param}:`
35+
});
36+
}
37+
}
38+
39+
return prompts;
40+
}
41+
42+
/**
43+
* Map of parameter names to prompt builder functions.
44+
* Each builder receives the current config and returns a prompt object.
45+
*/
46+
const PROMPT_BUILDERS = {
47+
deploymentConfig: (_config) => ({
48+
type: 'list',
49+
name: 'deploymentConfig',
50+
message: 'Select deployment configuration:',
51+
choices: [
52+
{ type: 'separator', separator: '── Large Language Models ──' },
53+
{ name: 'Transformers with vLLM', value: 'transformers-vllm' },
54+
{ name: 'Transformers with SGLang', value: 'transformers-sglang' },
55+
{ name: 'Transformers with TensorRT-LLM', value: 'transformers-tensorrt-llm' },
56+
{ name: 'Transformers with LMI', value: 'transformers-lmi' },
57+
{ name: 'Transformers with DJL', value: 'transformers-djl' },
58+
{ type: 'separator', separator: '── HTTP Serving ──' },
59+
{ name: 'HTTP with Flask', value: 'http-flask' },
60+
{ name: 'HTTP with FastAPI', value: 'http-fastapi' },
61+
{ type: 'separator', separator: '── NVIDIA Triton ──' },
62+
{ name: 'Triton FIL (XGBoost, LightGBM)', value: 'triton-fil' },
63+
{ name: 'Triton ONNX Runtime', value: 'triton-onnxruntime' },
64+
{ name: 'Triton TensorFlow', value: 'triton-tensorflow' },
65+
{ name: 'Triton PyTorch', value: 'triton-pytorch' },
66+
{ name: 'Triton vLLM', value: 'triton-vllm' },
67+
{ name: 'Triton TensorRT-LLM', value: 'triton-tensorrtllm' },
68+
{ name: 'Triton Python Backend', value: 'triton-python' },
69+
{ type: 'separator', separator: '── Diffusion Models ──' },
70+
{ name: 'Diffusors with vLLM Omni', value: 'diffusors-vllm-omni' }
71+
]
72+
}),
73+
74+
instanceType: (config) => {
75+
const architecture = config.architecture || 'http';
76+
const isGpu = architecture === 'transformers' || architecture === 'triton' || architecture === 'diffusors';
77+
78+
const gpuChoices = [
79+
{ name: 'ml.g5.xlarge (1× A10G 24GB — small LLMs)', value: 'ml.g5.xlarge' },
80+
{ name: 'ml.g5.2xlarge (1× A10G 24GB — medium LLMs)', value: 'ml.g5.2xlarge' },
81+
{ name: 'ml.g5.4xlarge (1× A10G 24GB — larger models)', value: 'ml.g5.4xlarge' },
82+
{ name: 'ml.g5.12xlarge (4× A10G 96GB — large LLMs)', value: 'ml.g5.12xlarge' },
83+
{ name: 'ml.g5.48xlarge (8× A10G 192GB — very large)', value: 'ml.g5.48xlarge' },
84+
{ name: 'ml.g6.xlarge (1× L4 24GB)', value: 'ml.g6.xlarge' },
85+
{ name: 'ml.g6.2xlarge (1× L4 24GB)', value: 'ml.g6.2xlarge' },
86+
{ name: 'ml.p4d.24xlarge (8× A100 320GB)', value: 'ml.p4d.24xlarge' },
87+
{ name: 'ml.p5.48xlarge (8× H100 640GB)', value: 'ml.p5.48xlarge' },
88+
{ name: 'Custom (enter manually)', value: '_custom' }
89+
];
90+
91+
const cpuChoices = [
92+
{ name: 'ml.m5.large (2 vCPU, 8GB — lightweight)', value: 'ml.m5.large' },
93+
{ name: 'ml.m5.xlarge (4 vCPU, 16GB — small models)', value: 'ml.m5.xlarge' },
94+
{ name: 'ml.m5.2xlarge (8 vCPU, 32GB — medium models)', value: 'ml.m5.2xlarge' },
95+
{ name: 'ml.m5.4xlarge (16 vCPU, 64GB — large models)', value: 'ml.m5.4xlarge' },
96+
{ name: 'ml.c5.xlarge (4 vCPU, 8GB — compute-heavy)', value: 'ml.c5.xlarge' },
97+
{ name: 'ml.c5.2xlarge (8 vCPU, 16GB — compute-heavy)', value: 'ml.c5.2xlarge' },
98+
{ name: 'Custom (enter manually)', value: '_custom' }
99+
];
100+
101+
return {
102+
type: 'list',
103+
name: 'instanceType',
104+
message: `Select instance type${isGpu ? ' (GPU recommended for this architecture)' : ''}:`,
105+
choices: isGpu ? gpuChoices : cpuChoices
106+
};
107+
},
108+
109+
deploymentTarget: (_config) => ({
110+
type: 'list',
111+
name: 'deploymentTarget',
112+
message: 'Select deployment target:',
113+
choices: [
114+
{ name: 'Real-Time Inference', value: 'realtime-inference' },
115+
{ name: 'Async Inference', value: 'async-inference' },
116+
{ name: 'Batch Transform', value: 'batch-transform' },
117+
{ name: 'HyperPod EKS', value: 'hyperpod-eks' }
118+
]
119+
}),
120+
121+
modelFormat: (config) => {
122+
const engine = config.engine || 'sklearn';
123+
const formatMap = {
124+
sklearn: [
125+
{ name: 'pkl (pickle)', value: 'pkl' },
126+
{ name: 'joblib', value: 'joblib' }
127+
],
128+
xgboost: [
129+
{ name: 'json', value: 'json' },
130+
{ name: 'model (binary)', value: 'model' },
131+
{ name: 'ubj (universal binary JSON)', value: 'ubj' }
132+
],
133+
tensorflow: [
134+
{ name: 'keras', value: 'keras' },
135+
{ name: 'h5', value: 'h5' },
136+
{ name: 'SavedModel', value: 'SavedModel' }
137+
]
138+
};
139+
140+
const choices = formatMap[engine] || formatMap.sklearn;
141+
142+
return {
143+
type: 'list',
144+
name: 'modelFormat',
145+
message: `Select model format for ${engine}:`,
146+
choices
147+
};
148+
},
149+
150+
awsRegion: (_config) => ({
151+
type: 'list',
152+
name: 'awsRegion',
153+
message: 'Select AWS region:',
154+
choices: [
155+
{ name: 'us-east-1 (N. Virginia)', value: 'us-east-1' },
156+
{ name: 'us-west-2 (Oregon)', value: 'us-west-2' },
157+
{ name: 'eu-west-1 (Ireland)', value: 'eu-west-1' },
158+
{ name: 'ap-northeast-1 (Tokyo)', value: 'ap-northeast-1' },
159+
{ name: 'ap-southeast-1 (Singapore)', value: 'ap-southeast-1' },
160+
{ name: 'Custom (enter manually)', value: '_custom' }
161+
]
162+
}),
163+
164+
buildTarget: (_config) => ({
165+
type: 'list',
166+
name: 'buildTarget',
167+
message: 'Select build target:',
168+
choices: [
169+
{ name: 'CodeBuild (recommended)', value: 'codebuild' }
170+
]
171+
})
172+
};

src/lib/ci-register-helpers.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import { createHash } from 'node:crypto';
2525
* @param {string} modelName - e.g. "meta-llama/Llama-2-7b-chat-hf", defaults to "none"
2626
* @param {string} instanceType - e.g. "ml.g5.xlarge"
2727
* @param {string} region - e.g. "us-east-1"
28-
* @param {string} deploymentTarget - e.g. "managed-inference"
28+
* @param {string} deploymentTarget - e.g. "realtime-inference"
2929
* @returns {string} 16-character lowercase hex string
3030
*/
3131
export function computeConfigId(deploymentConfig, modelName, instanceType, region, deploymentTarget) {

src/lib/cli-handler.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ CLI OPTIONS:
190190
--instance-type=<type> SageMaker instance type (e.g., ml.m5.large, ml.g5.xlarge)
191191
--region=<region> AWS region
192192
--role-arn=<arn> AWS IAM role ARN for SageMaker execution
193-
--deployment-target=<target> Deployment target (managed-inference|hyperpod-eks)
193+
--deployment-target=<target> Deployment target (realtime-inference|async-inference|batch-transform|hyperpod-eks)
194194
--hyperpod-cluster=<name> HyperPod EKS cluster name
195195
--hyperpod-namespace=<ns> Kubernetes namespace for HyperPod (default: default)
196196
--hyperpod-replicas=<n> Number of replicas for HyperPod (default: 1)

0 commit comments

Comments
 (0)