-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllm-prompts-ci.js
More file actions
109 lines (99 loc) · 3.36 KB
/
llm-prompts-ci.js
File metadata and controls
109 lines (99 loc) · 3.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env node
/**
* Package a prompt dataset for LLM fine-tuning and publish it via CI.
*
* The `prompts` layer is designed for exactly this use case: versioning prompt
* templates, instruction sets, and RLHF preference data alongside the model
* weights and training code they belong to. Storing everything in a single
* ModelKit keeps the fine-tuning run fully reproducible — you always know
* which prompts produced which checkpoint.
*
* Expected directory layout:
*
* fine-tune/
* ├── Kitfile (generated below if absent)
* ├── checkpoints/ model checkpoint directory
* ├── prompts/
* │ ├── system.txt system prompt template
* │ ├── instructions.jsonl instruction-following examples
* │ └── preferences.jsonl RLHF preference pairs
* ├── data/
* │ └── train.jsonl raw training corpus
* └── train.py training script
*
* Run:
* MODEL_VERSION=0.2.0 REGISTRY_USER=... REGISTRY_PASS=... node examples/llm-prompts-ci.js
*/
import { writeFile, access } from 'fs/promises';
import { stringify as toYaml } from 'yaml';
import { login, pack, push, logout } from '../dist/index.js';
const registry = process.env.REGISTRY ?? 'registry.example.com';
const user = requireEnv('REGISTRY_USER');
const pass = requireEnv('REGISTRY_PASS');
const version = process.env.MODEL_VERSION ?? 'latest';
const ref = `${registry}/org/llm-finetune:v${version}`;
const workdir = './fine-tune';
// Generate a Kitfile if one isn't already committed.
const kitfilePath = `${workdir}/Kitfile`;
const kitfileExists = await access(kitfilePath).then(() => true).catch(() => false);
if (!kitfileExists) {
const kitfile = {
manifestVersion: '1.0.0',
package: {
name: 'llm-finetune',
version,
description: 'Fine-tuning run with versioned prompt dataset',
authors: ['AI Team'],
},
model: {
name: 'base-checkpoint',
path: './checkpoints',
description: 'Latest training checkpoint',
},
// The prompts layer captures everything that shaped the model's behaviour:
// system prompts, instruction templates, and preference data used in RLHF.
prompts: [
{
path: './prompts/system.txt',
description: 'System prompt template used during fine-tuning',
},
{
path: './prompts/instructions.jsonl',
description: 'Instruction-following examples (ShareGPT format)',
},
{
path: './prompts/preferences.jsonl',
description: 'RLHF preference pairs for DPO training',
},
],
datasets: [
{
name: 'training-corpus',
path: './data/train.jsonl',
description: 'Raw pre-training corpus used for continued pre-training',
},
],
code: [
{ path: './train.py', description: 'Fine-tuning entry point' },
],
};
await writeFile(kitfilePath, toYaml(kitfile, { lineWidth: 0 }));
console.log('Kitfile generated.');
}
await login(registry, user, pass);
try {
await pack(workdir, { tag: ref });
console.log(`Packed: ${ref}`);
await push(ref);
console.log(`Pushed: ${ref}`);
} finally {
await logout(registry);
}
function requireEnv(name) {
const value = process.env[name];
if (!value) {
console.error(`Missing required environment variable: ${name}`);
process.exit(1);
}
return value;
}