Skip to content

Commit e0ca04e

Browse files
committed
feat: add vrm export
1 parent 755d692 commit e0ca04e

2 files changed

Lines changed: 178 additions & 108 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,5 @@ public/unity/Build/aang-renderer.data
3737
public/unity/Build/aang-renderer.framework.js
3838
public/unity/Build/aang-renderer.symbols.json
3939
public/unity/Build/aang-renderer.wasm
40+
41+
tmp

src/lib/babylon/export.ts

Lines changed: 176 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,67 @@
11
import { AbstractMesh, Bone, Matrix, Quaternion, Scene, Skeleton, TransformNode, Vector3 } from '@babylonjs/core'
22
import { GLTF2Export } from '@babylonjs/serializers/glTF'
33

4-
// Maps DCL avatar bone names to VRM 0.x humanoid bone names
4+
// Maps DCL avatar bone names to VRM 0.x humanoid bone names.
5+
//
6+
// IMPORTANT: DCL's rig uses an opposite handedness convention from VRM 0.x —
7+
// Avatar_LeftArm sits at world +X, whereas VRM 0.x expects the leftUpperArm to
8+
// be at -X (avatar faces +Z, its own left side is -X). To make VRM animations
9+
// apply to the correct side of the body, we map each DCL "Left" bone to the
10+
// VRM "right" slot, and vice versa. The bone names look misleading but the
11+
// rotation directions end up correct in any compliant VRM viewer.
512
const DCL_TO_VRM_HUMANOID: Record<string, string> = {
613
Avatar_Hips: 'hips',
714
Avatar_Spine: 'spine',
815
Avatar_Spine1: 'chest',
916
Avatar_Spine2: 'upperChest',
1017
Avatar_Neck: 'neck',
1118
Avatar_Head: 'head',
12-
Avatar_LeftShoulder: 'leftShoulder',
13-
Avatar_LeftArm: 'leftUpperArm',
14-
Avatar_LeftForeArm: 'leftLowerArm',
15-
Avatar_LeftHand: 'leftHand',
16-
Avatar_RightShoulder: 'rightShoulder',
17-
Avatar_RightArm: 'rightUpperArm',
18-
Avatar_RightForeArm: 'rightLowerArm',
19-
Avatar_RightHand: 'rightHand',
20-
Avatar_LeftUpLeg: 'leftUpperLeg',
21-
Avatar_LeftLeg: 'leftLowerLeg',
22-
Avatar_LeftFoot: 'leftFoot',
23-
Avatar_LeftToeBase: 'leftToes',
24-
Avatar_RightUpLeg: 'rightUpperLeg',
25-
Avatar_RightLeg: 'rightLowerLeg',
26-
Avatar_RightFoot: 'rightFoot',
27-
Avatar_RightToeBase: 'rightToes',
28-
Avatar_LeftHandThumb1: 'leftThumbProximal',
29-
Avatar_LeftHandThumb2: 'leftThumbIntermediate',
30-
Avatar_LeftHandThumb3: 'leftThumbDistal',
31-
Avatar_LeftHandIndex1: 'leftIndexProximal',
32-
Avatar_LeftHandIndex2: 'leftIndexIntermediate',
33-
Avatar_LeftHandIndex3: 'leftIndexDistal',
34-
Avatar_LeftHandMiddle1: 'leftMiddleProximal',
35-
Avatar_LeftHandMiddle2: 'leftMiddleIntermediate',
36-
Avatar_LeftHandMiddle3: 'leftMiddleDistal',
37-
Avatar_LeftHandRing1: 'leftRingProximal',
38-
Avatar_LeftHandRing2: 'leftRingIntermediate',
39-
Avatar_LeftHandRing3: 'leftRingDistal',
40-
Avatar_LeftHandPinky1: 'leftLittleProximal',
41-
Avatar_LeftHandPinky2: 'leftLittleIntermediate',
42-
Avatar_LeftHandPinky3: 'leftLittleDistal',
43-
Avatar_RightHandThumb1: 'rightThumbProximal',
44-
Avatar_RightHandThumb2: 'rightThumbIntermediate',
45-
Avatar_RightHandThumb3: 'rightThumbDistal',
46-
Avatar_RightHandIndex1: 'rightIndexProximal',
47-
Avatar_RightHandIndex2: 'rightIndexIntermediate',
48-
Avatar_RightHandIndex3: 'rightIndexDistal',
49-
Avatar_RightHandMiddle1: 'rightMiddleProximal',
50-
Avatar_RightHandMiddle2: 'rightMiddleIntermediate',
51-
Avatar_RightHandMiddle3: 'rightMiddleDistal',
52-
Avatar_RightHandRing1: 'rightRingProximal',
53-
Avatar_RightHandRing2: 'rightRingIntermediate',
54-
Avatar_RightHandRing3: 'rightRingDistal',
55-
Avatar_RightHandPinky1: 'rightLittleProximal',
56-
Avatar_RightHandPinky2: 'rightLittleIntermediate',
57-
Avatar_RightHandPinky3: 'rightLittleDistal',
19+
Avatar_LeftShoulder: 'rightShoulder',
20+
Avatar_LeftArm: 'rightUpperArm',
21+
Avatar_LeftForeArm: 'rightLowerArm',
22+
Avatar_LeftHand: 'rightHand',
23+
Avatar_RightShoulder: 'leftShoulder',
24+
Avatar_RightArm: 'leftUpperArm',
25+
Avatar_RightForeArm: 'leftLowerArm',
26+
Avatar_RightHand: 'leftHand',
27+
Avatar_LeftUpLeg: 'rightUpperLeg',
28+
Avatar_LeftLeg: 'rightLowerLeg',
29+
Avatar_LeftFoot: 'rightFoot',
30+
Avatar_LeftToeBase: 'rightToes',
31+
Avatar_RightUpLeg: 'leftUpperLeg',
32+
Avatar_RightLeg: 'leftLowerLeg',
33+
Avatar_RightFoot: 'leftFoot',
34+
Avatar_RightToeBase: 'leftToes',
35+
Avatar_LeftHandThumb1: 'rightThumbProximal',
36+
Avatar_LeftHandThumb2: 'rightThumbIntermediate',
37+
Avatar_LeftHandThumb3: 'rightThumbDistal',
38+
Avatar_LeftHandIndex1: 'rightIndexProximal',
39+
Avatar_LeftHandIndex2: 'rightIndexIntermediate',
40+
Avatar_LeftHandIndex3: 'rightIndexDistal',
41+
Avatar_LeftHandMiddle1: 'rightMiddleProximal',
42+
Avatar_LeftHandMiddle2: 'rightMiddleIntermediate',
43+
Avatar_LeftHandMiddle3: 'rightMiddleDistal',
44+
Avatar_LeftHandRing1: 'rightRingProximal',
45+
Avatar_LeftHandRing2: 'rightRingIntermediate',
46+
Avatar_LeftHandRing3: 'rightRingDistal',
47+
Avatar_LeftHandPinky1: 'rightLittleProximal',
48+
Avatar_LeftHandPinky2: 'rightLittleIntermediate',
49+
Avatar_LeftHandPinky3: 'rightLittleDistal',
50+
Avatar_RightHandThumb1: 'leftThumbProximal',
51+
Avatar_RightHandThumb2: 'leftThumbIntermediate',
52+
Avatar_RightHandThumb3: 'leftThumbDistal',
53+
Avatar_RightHandIndex1: 'leftIndexProximal',
54+
Avatar_RightHandIndex2: 'leftIndexIntermediate',
55+
Avatar_RightHandIndex3: 'leftIndexDistal',
56+
Avatar_RightHandMiddle1: 'leftMiddleProximal',
57+
Avatar_RightHandMiddle2: 'leftMiddleIntermediate',
58+
Avatar_RightHandMiddle3: 'leftMiddleDistal',
59+
Avatar_RightHandRing1: 'leftRingProximal',
60+
Avatar_RightHandRing2: 'leftRingIntermediate',
61+
Avatar_RightHandRing3: 'leftRingDistal',
62+
Avatar_RightHandPinky1: 'leftLittleProximal',
63+
Avatar_RightHandPinky2: 'leftLittleIntermediate',
64+
Avatar_RightHandPinky3: 'leftLittleDistal',
5865
}
5966

6067
function readGLBChunks(buffer: ArrayBuffer): { json: any; binChunk: ArrayBuffer | null } {
@@ -106,53 +113,70 @@ function packGLB(json: any, binChunk: ArrayBuffer | null): ArrayBuffer {
106113
}
107114

108115
/**
109-
* Bakes the current visible pose (whatever the live preview shows) as the new
110-
* bind pose of the exported glTF. Without this, the .vrm encodes the rig's
111-
* authored bind pose (fingers spread, Mixamo-style feet) — which is what most
112-
* VRM viewers render when no animation is applied.
116+
* Rewrites bones into canonical form to match what well-behaved VRM exporters
117+
* (e.g. UniVRM) produce: each bone-node has identity rotation, identity scale,
118+
* and a parent-relative translation in world meters; each inverseBindMatrices
119+
* entry is the inverse of a pure translation matrix.
113120
*
114-
* For each bone in the skin, overwrites:
115-
* - the node's TRS with the bone's current local matrix
116-
* - the inverseBindMatrices accessor entry with inverse(current absolute)
121+
* The reference VRM we inspected had this layout exactly. DCL's rig comes out
122+
* of Babylon with weird non-canonical TRS (0.01 scale baked in, 180° rotations
123+
* on the basis vectors, translations at 100× meter scale). That layout renders
124+
* correctly at rest because the scale×rotation in jointWorld and the inverse
125+
* scale×rotation in IBM cancel out — but VRM animations rotate joints in their
126+
* local frame, so the baked rotations push arms backward, feet sideways, etc.
117127
*
118-
* Math sanity-check: at viewer rest, jointWorld_i × IBM_i must equal identity
119-
* so the mesh renders at its mesh-local vertex positions. Since we set
120-
* node_i.TRS = bone_i.localMatrix → jointWorld_i = bone_i.absoluteMatrix
121-
* IBM_i = inverse(bone_i.absoluteMatrix)
122-
* the product is identity by construction.
128+
* Algorithm:
129+
* 1. For each bone, read its world position from snap.absolute.getTranslation()
130+
* — Babylon stores skeleton-local positions there at meter scale.
131+
* 2. Set node.TRS = (worldPos − parentWorldPos, identity rotation, unit scale).
132+
* Root bones (no bone parent) just use worldPos directly.
133+
* 3. Set IBM = inverse(translate(worldPos)) = translate(−worldPos).
134+
*
135+
* Math sanity-check: at rest, jointWorld_i is a chain of pure translations =
136+
* translate(worldPos_i); IBM_i = translate(−worldPos_i); product = identity, so
137+
* the mesh renders at its stored mesh-local vertex positions (which Babylon
138+
* also outputs at meter scale).
123139
*/
124140
function rebakeBindPose(
125141
json: any,
126142
binChunk: ArrayBuffer | null,
127143
snapshotByName: Map<string, { local: Matrix; absolute: Matrix }>,
144+
boneParentNameByName: Map<string, string | null>,
128145
): void {
129146
if (!binChunk || !json.skins || !json.nodes || !json.accessors || !json.bufferViews) return
130147

131-
// 1) Overwrite each bone-node's TRS with the snapshot's local matrix.
148+
// World position per bone, extracted from the (skeleton-local) absolute matrix.
149+
// We rely on the skeleton's owner mesh being at identity (parent mesh is reset
150+
// earlier in exportVRM), so skeleton-local equals world.
151+
const worldPosByName = new Map<string, Vector3>()
152+
for (const [name, snap] of snapshotByName) {
153+
worldPosByName.set(name, snap.absolute.getTranslation())
154+
}
155+
132156
const boneNodeIndices = new Set<number>()
133157
for (const skin of json.skins) {
134158
for (const idx of skin.joints) boneNodeIndices.add(idx)
135159
}
136160

137-
const tmpScale = new Vector3()
138-
const tmpRotation = new Quaternion()
139-
const tmpTranslation = new Vector3()
161+
// 1) Canonical TRS per bone-node.
140162
for (const idx of boneNodeIndices) {
141163
const node = json.nodes[idx]
142164
if (!node?.name) continue
143-
const snap = snapshotByName.get(node.name)
144-
if (!snap) continue
165+
const myWorldPos = worldPosByName.get(node.name)
166+
if (!myWorldPos) continue
167+
168+
const parentName = boneParentNameByName.get(node.name)
169+
const parentWorldPos = parentName ? worldPosByName.get(parentName) : undefined
170+
const localPos = parentWorldPos ? myWorldPos.subtract(parentWorldPos) : myWorldPos
145171

146-
snap.local.decompose(tmpScale, tmpRotation, tmpTranslation)
147-
node.translation = tmpTranslation.asArray()
148-
node.rotation = tmpRotation.asArray()
149-
node.scale = tmpScale.asArray()
172+
node.translation = [localPos.x, localPos.y, localPos.z]
173+
node.rotation = [0, 0, 0, 1]
174+
node.scale = [1, 1, 1]
150175
delete node.matrix
151176
}
152177

153-
// 2) Overwrite each skin's inverseBindMatrices in-place. The accessor's data
154-
// is Float32 column-major 4x4 matrices packed back-to-back in the binary
155-
// chunk; we mutate via a Float32Array view over the underlying buffer.
178+
// 2) IBMs = inverse pure translation. We mutate the accessor's Float32 view
179+
// over the binary chunk in place — same layout we use to read GLB chunks.
156180
for (const skin of json.skins) {
157181
if (skin.inverseBindMatrices === undefined) continue
158182
const accessor = json.accessors[skin.inverseBindMatrices]
@@ -165,9 +189,12 @@ function rebakeBindPose(
165189
const ibmView = new Float32Array(binChunk, totalOffset, skin.joints.length * 16)
166190
for (let i = 0; i < skin.joints.length; i++) {
167191
const jointNode = json.nodes[skin.joints[i]]
168-
const snap = jointNode?.name ? snapshotByName.get(jointNode.name) : undefined
169-
if (!snap) continue
170-
Matrix.Invert(snap.absolute).copyToArray(ibmView, i * 16)
192+
const worldPos = jointNode?.name ? worldPosByName.get(jointNode.name) : undefined
193+
if (!worldPos) {
194+
Matrix.Identity().copyToArray(ibmView, i * 16)
195+
continue
196+
}
197+
Matrix.Translation(-worldPos.x, -worldPos.y, -worldPos.z).copyToArray(ibmView, i * 16)
171198
}
172199
}
173200
}
@@ -203,40 +230,71 @@ function mergeSkeletons(json: any): void {
203230
}
204231

205232
/**
206-
* Wraps all top-level scene nodes in a new "VRMRoot" node with a corrective
207-
* rotation. This is applied AFTER Babylon's serializer has done its handedness
208-
* conversion, so it's purely additive — we're not fighting the serializer,
209-
* just rotating the final result.
233+
* Restructures the glTF JSON to match the canonical layout that UniVRM (and
234+
* other clean exporters) produce:
235+
* scene roots = [hips, meshes, secondary]
236+
* Avatar_Hips becomes a top-level skeleton root, all mesh nodes get grouped
237+
* under a transformless "meshes" container, and a "secondary" empty container
238+
* is added for VRM spring-bone secondary animations.
210239
*
211-
* VRM 0.x convention: avatar standing upright (Y-up), facing +Z.
212-
* If the exported avatar comes out upside-down or mirrored, adjust the
213-
* quaternion below empirically.
240+
* Why this matters: Babylon's serializer wraps everything inside the original
241+
* scene meshes (`parent`, `top`, `bottom`) and adds a 180° Y rotation to them
242+
* for left→right handedness conversion. Combined with our canonical rebake
243+
* those wrappers leave the avatar at a position viewers don't auto-frame.
244+
* Flattening to the reference layout fixes positioning AND helps viewers
245+
* recognize the avatar as a standard humanoid VRM.
214246
*/
215-
function applyOrientationFix(json: any): void {
216-
if (!json.nodes || !json.scenes || json.scenes.length === 0) return
217-
218-
// 180° rotation around Y so the avatar faces the camera (+Z), matching the
219-
// VRM 0.x facing convention. Babylon exports the avatar facing -Z by default.
220-
const correctionRotation = [0, 1, 0, 0]
221-
222-
// Lift the avatar so feet sit on the viewer's ground plane. Tune if needed —
223-
// some viewers place their grid at chest/head height instead of Y=0.
224-
const correctionTranslation = [0, 1.8, 0]
225-
226-
const scene = json.scenes[0]
227-
const originalRootIndices = [...scene.nodes]
228-
229-
// Create the new wrapper node
230-
const wrapperIndex = json.nodes.length
231-
json.nodes.push({
232-
name: 'VRMRoot',
233-
rotation: correctionRotation,
234-
translation: correctionTranslation,
235-
children: originalRootIndices,
236-
})
247+
function restructureForVrm(json: any): void {
248+
if (!json.skins || !json.nodes || !json.scenes || json.scenes.length === 0) return
249+
250+
// Find Avatar_Hips — this becomes the new scene-root skeleton.
251+
let hipsIdx = -1
252+
for (let i = 0; i < json.nodes.length; i++) {
253+
if (json.nodes[i].name === 'Avatar_Hips') {
254+
hipsIdx = i
255+
break
256+
}
257+
}
258+
if (hipsIdx < 0) return
259+
260+
// All mesh-bearing nodes — they get moved into the new "meshes" container.
261+
const meshNodeIndices: number[] = []
262+
for (let i = 0; i < json.nodes.length; i++) {
263+
if (json.nodes[i].mesh !== undefined) meshNodeIndices.push(i)
264+
}
237265

238-
// Replace scene roots with just the wrapper
239-
scene.nodes = [wrapperIndex]
266+
// Detach hips and mesh nodes from any current parent.
267+
const detached = new Set<number>([hipsIdx, ...meshNodeIndices])
268+
for (let i = 0; i < json.nodes.length; i++) {
269+
const node = json.nodes[i]
270+
if (node.children && node.children.length) {
271+
node.children = node.children.filter((c: number) => !detached.has(c))
272+
if (node.children.length === 0) delete node.children
273+
}
274+
}
275+
276+
// Strip transforms from mesh nodes — skinned meshes ignore their own
277+
// transform per glTF spec, and the reference VRM leaves them unset.
278+
for (const m of meshNodeIndices) {
279+
delete json.nodes[m].translation
280+
delete json.nodes[m].rotation
281+
delete json.nodes[m].scale
282+
delete json.nodes[m].matrix
283+
}
284+
285+
// Create the two extra scene roots.
286+
const meshesIdx = json.nodes.length
287+
json.nodes.push({ name: 'meshes', children: meshNodeIndices })
288+
const secondaryIdx = json.nodes.length
289+
json.nodes.push({ name: 'secondary' })
290+
291+
// New scene roots: hips, meshes, secondary (mirrors juanma reference exactly).
292+
json.scenes[0].nodes = [hipsIdx, meshesIdx, secondaryIdx]
293+
294+
// Point every skin's skeleton at the new hips root.
295+
for (const skin of json.skins) {
296+
if (skin.skeleton !== undefined) skin.skeleton = hipsIdx
297+
}
240298
}
241299

242300
function injectVRMExtension(json: any): void {
@@ -396,6 +454,16 @@ export async function exportVRM(scene: Scene): Promise<Blob> {
396454
}
397455
}
398456

457+
// Map of bone name → parent bone name (using Babylon's bone hierarchy, which
458+
// is the source of truth for absolute transforms). rebakeBindPose uses this
459+
// to compute parent-relative translations in canonical form.
460+
const boneParentNameByName = new Map<string, string | null>()
461+
for (const snap of boneSnapshots) {
462+
if (boneParentNameByName.has(snap.bone.name)) continue
463+
const parent = snap.bone.getParent()
464+
boneParentNameByName.set(snap.bone.name, parent ? parent.name : null)
465+
}
466+
399467
try {
400468
const glbData = await GLTF2Export.GLBAsync(scene, 'avatar', {
401469
shouldExportNode: (node) => {
@@ -410,9 +478,9 @@ export async function exportVRM(scene: Scene): Promise<Blob> {
410478
const buffer = await glbBlob.arrayBuffer()
411479
const { json, binChunk } = readGLBChunks(buffer)
412480

413-
rebakeBindPose(json, binChunk, snapshotByName)
481+
rebakeBindPose(json, binChunk, snapshotByName, boneParentNameByName)
414482
mergeSkeletons(json)
415-
applyOrientationFix(json)
483+
restructureForVrm(json)
416484
injectVRMExtension(json)
417485

418486
return new Blob([packGLB(json, binChunk)], { type: 'application/octet-stream' })

0 commit comments

Comments
 (0)