11import { AbstractMesh , Bone , Matrix , Quaternion , Scene , Skeleton , TransformNode , Vector3 } from '@babylonjs/core'
22import { GLTF2Export } from '@babylonjs/serializers/glTF'
33
4- // Maps DCL avatar bone names to VRM 0.x humanoid bone names
4+ // Maps DCL avatar bone names to VRM 0.x humanoid bone names.
5+ //
6+ // IMPORTANT: DCL's rig uses an opposite handedness convention from VRM 0.x —
7+ // Avatar_LeftArm sits at world +X, whereas VRM 0.x expects the leftUpperArm to
8+ // be at -X (avatar faces +Z, its own left side is -X). To make VRM animations
9+ // apply to the correct side of the body, we map each DCL "Left" bone to the
10+ // VRM "right" slot, and vice versa. The bone names look misleading but the
11+ // rotation directions end up correct in any compliant VRM viewer.
512const DCL_TO_VRM_HUMANOID : Record < string , string > = {
613 Avatar_Hips : 'hips' ,
714 Avatar_Spine : 'spine' ,
815 Avatar_Spine1 : 'chest' ,
916 Avatar_Spine2 : 'upperChest' ,
1017 Avatar_Neck : 'neck' ,
1118 Avatar_Head : 'head' ,
12- Avatar_LeftShoulder : 'leftShoulder ' ,
13- Avatar_LeftArm : 'leftUpperArm ' ,
14- Avatar_LeftForeArm : 'leftLowerArm ' ,
15- Avatar_LeftHand : 'leftHand ' ,
16- Avatar_RightShoulder : 'rightShoulder ' ,
17- Avatar_RightArm : 'rightUpperArm ' ,
18- Avatar_RightForeArm : 'rightLowerArm ' ,
19- Avatar_RightHand : 'rightHand ' ,
20- Avatar_LeftUpLeg : 'leftUpperLeg ' ,
21- Avatar_LeftLeg : 'leftLowerLeg ' ,
22- Avatar_LeftFoot : 'leftFoot ' ,
23- Avatar_LeftToeBase : 'leftToes ' ,
24- Avatar_RightUpLeg : 'rightUpperLeg ' ,
25- Avatar_RightLeg : 'rightLowerLeg ' ,
26- Avatar_RightFoot : 'rightFoot ' ,
27- Avatar_RightToeBase : 'rightToes ' ,
28- Avatar_LeftHandThumb1 : 'leftThumbProximal ' ,
29- Avatar_LeftHandThumb2 : 'leftThumbIntermediate ' ,
30- Avatar_LeftHandThumb3 : 'leftThumbDistal ' ,
31- Avatar_LeftHandIndex1 : 'leftIndexProximal ' ,
32- Avatar_LeftHandIndex2 : 'leftIndexIntermediate ' ,
33- Avatar_LeftHandIndex3 : 'leftIndexDistal ' ,
34- Avatar_LeftHandMiddle1 : 'leftMiddleProximal ' ,
35- Avatar_LeftHandMiddle2 : 'leftMiddleIntermediate ' ,
36- Avatar_LeftHandMiddle3 : 'leftMiddleDistal ' ,
37- Avatar_LeftHandRing1 : 'leftRingProximal ' ,
38- Avatar_LeftHandRing2 : 'leftRingIntermediate ' ,
39- Avatar_LeftHandRing3 : 'leftRingDistal ' ,
40- Avatar_LeftHandPinky1 : 'leftLittleProximal ' ,
41- Avatar_LeftHandPinky2 : 'leftLittleIntermediate ' ,
42- Avatar_LeftHandPinky3 : 'leftLittleDistal ' ,
43- Avatar_RightHandThumb1 : 'rightThumbProximal ' ,
44- Avatar_RightHandThumb2 : 'rightThumbIntermediate ' ,
45- Avatar_RightHandThumb3 : 'rightThumbDistal ' ,
46- Avatar_RightHandIndex1 : 'rightIndexProximal ' ,
47- Avatar_RightHandIndex2 : 'rightIndexIntermediate ' ,
48- Avatar_RightHandIndex3 : 'rightIndexDistal ' ,
49- Avatar_RightHandMiddle1 : 'rightMiddleProximal ' ,
50- Avatar_RightHandMiddle2 : 'rightMiddleIntermediate ' ,
51- Avatar_RightHandMiddle3 : 'rightMiddleDistal ' ,
52- Avatar_RightHandRing1 : 'rightRingProximal ' ,
53- Avatar_RightHandRing2 : 'rightRingIntermediate ' ,
54- Avatar_RightHandRing3 : 'rightRingDistal ' ,
55- Avatar_RightHandPinky1 : 'rightLittleProximal ' ,
56- Avatar_RightHandPinky2 : 'rightLittleIntermediate ' ,
57- Avatar_RightHandPinky3 : 'rightLittleDistal ' ,
19+ Avatar_LeftShoulder : 'rightShoulder ' ,
20+ Avatar_LeftArm : 'rightUpperArm ' ,
21+ Avatar_LeftForeArm : 'rightLowerArm ' ,
22+ Avatar_LeftHand : 'rightHand ' ,
23+ Avatar_RightShoulder : 'leftShoulder ' ,
24+ Avatar_RightArm : 'leftUpperArm ' ,
25+ Avatar_RightForeArm : 'leftLowerArm ' ,
26+ Avatar_RightHand : 'leftHand ' ,
27+ Avatar_LeftUpLeg : 'rightUpperLeg ' ,
28+ Avatar_LeftLeg : 'rightLowerLeg ' ,
29+ Avatar_LeftFoot : 'rightFoot ' ,
30+ Avatar_LeftToeBase : 'rightToes ' ,
31+ Avatar_RightUpLeg : 'leftUpperLeg ' ,
32+ Avatar_RightLeg : 'leftLowerLeg ' ,
33+ Avatar_RightFoot : 'leftFoot ' ,
34+ Avatar_RightToeBase : 'leftToes ' ,
35+ Avatar_LeftHandThumb1 : 'rightThumbProximal ' ,
36+ Avatar_LeftHandThumb2 : 'rightThumbIntermediate ' ,
37+ Avatar_LeftHandThumb3 : 'rightThumbDistal ' ,
38+ Avatar_LeftHandIndex1 : 'rightIndexProximal ' ,
39+ Avatar_LeftHandIndex2 : 'rightIndexIntermediate ' ,
40+ Avatar_LeftHandIndex3 : 'rightIndexDistal ' ,
41+ Avatar_LeftHandMiddle1 : 'rightMiddleProximal ' ,
42+ Avatar_LeftHandMiddle2 : 'rightMiddleIntermediate ' ,
43+ Avatar_LeftHandMiddle3 : 'rightMiddleDistal ' ,
44+ Avatar_LeftHandRing1 : 'rightRingProximal ' ,
45+ Avatar_LeftHandRing2 : 'rightRingIntermediate ' ,
46+ Avatar_LeftHandRing3 : 'rightRingDistal ' ,
47+ Avatar_LeftHandPinky1 : 'rightLittleProximal ' ,
48+ Avatar_LeftHandPinky2 : 'rightLittleIntermediate ' ,
49+ Avatar_LeftHandPinky3 : 'rightLittleDistal ' ,
50+ Avatar_RightHandThumb1 : 'leftThumbProximal ' ,
51+ Avatar_RightHandThumb2 : 'leftThumbIntermediate ' ,
52+ Avatar_RightHandThumb3 : 'leftThumbDistal ' ,
53+ Avatar_RightHandIndex1 : 'leftIndexProximal ' ,
54+ Avatar_RightHandIndex2 : 'leftIndexIntermediate ' ,
55+ Avatar_RightHandIndex3 : 'leftIndexDistal ' ,
56+ Avatar_RightHandMiddle1 : 'leftMiddleProximal ' ,
57+ Avatar_RightHandMiddle2 : 'leftMiddleIntermediate ' ,
58+ Avatar_RightHandMiddle3 : 'leftMiddleDistal ' ,
59+ Avatar_RightHandRing1 : 'leftRingProximal ' ,
60+ Avatar_RightHandRing2 : 'leftRingIntermediate ' ,
61+ Avatar_RightHandRing3 : 'leftRingDistal ' ,
62+ Avatar_RightHandPinky1 : 'leftLittleProximal ' ,
63+ Avatar_RightHandPinky2 : 'leftLittleIntermediate ' ,
64+ Avatar_RightHandPinky3 : 'leftLittleDistal ' ,
5865}
5966
6067function readGLBChunks ( buffer : ArrayBuffer ) : { json : any ; binChunk : ArrayBuffer | null } {
@@ -106,53 +113,70 @@ function packGLB(json: any, binChunk: ArrayBuffer | null): ArrayBuffer {
106113}
107114
108115/**
109- * Bakes the current visible pose (whatever the live preview shows) as the new
110- * bind pose of the exported glTF. Without this, the .vrm encodes the rig's
111- * authored bind pose (fingers spread, Mixamo-style feet) — which is what most
112- * VRM viewers render when no animation is applied .
116+ * Rewrites bones into canonical form to match what well-behaved VRM exporters
117+ * (e.g. UniVRM) produce: each bone-node has identity rotation, identity scale,
118+ * and a parent-relative translation in world meters; each inverseBindMatrices
119+ * entry is the inverse of a pure translation matrix .
113120 *
114- * For each bone in the skin, overwrites:
115- * - the node's TRS with the bone's current local matrix
116- * - the inverseBindMatrices accessor entry with inverse(current absolute)
121+ * The reference VRM we inspected had this layout exactly. DCL's rig comes out
122+ * of Babylon with weird non-canonical TRS (0.01 scale baked in, 180° rotations
123+ * on the basis vectors, translations at 100× meter scale). That layout renders
124+ * correctly at rest because the scale×rotation in jointWorld and the inverse
125+ * scale×rotation in IBM cancel out — but VRM animations rotate joints in their
126+ * local frame, so the baked rotations push arms backward, feet sideways, etc.
117127 *
118- * Math sanity-check: at viewer rest, jointWorld_i × IBM_i must equal identity
119- * so the mesh renders at its mesh-local vertex positions. Since we set
120- * node_i.TRS = bone_i.localMatrix → jointWorld_i = bone_i.absoluteMatrix
121- * IBM_i = inverse(bone_i.absoluteMatrix)
122- * the product is identity by construction.
128+ * Algorithm:
129+ * 1. For each bone, read its world position from snap.absolute.getTranslation()
130+ * — Babylon stores skeleton-local positions there at meter scale.
131+ * 2. Set node.TRS = (worldPos − parentWorldPos, identity rotation, unit scale).
132+ * Root bones (no bone parent) just use worldPos directly.
133+ * 3. Set IBM = inverse(translate(worldPos)) = translate(−worldPos).
134+ *
135+ * Math sanity-check: at rest, jointWorld_i is a chain of pure translations =
136+ * translate(worldPos_i); IBM_i = translate(−worldPos_i); product = identity, so
137+ * the mesh renders at its stored mesh-local vertex positions (which Babylon
138+ * also outputs at meter scale).
123139 */
124140function rebakeBindPose (
125141 json : any ,
126142 binChunk : ArrayBuffer | null ,
127143 snapshotByName : Map < string , { local : Matrix ; absolute : Matrix } > ,
144+ boneParentNameByName : Map < string , string | null > ,
128145) : void {
129146 if ( ! binChunk || ! json . skins || ! json . nodes || ! json . accessors || ! json . bufferViews ) return
130147
131- // 1) Overwrite each bone-node's TRS with the snapshot's local matrix.
148+ // World position per bone, extracted from the (skeleton-local) absolute matrix.
149+ // We rely on the skeleton's owner mesh being at identity (parent mesh is reset
150+ // earlier in exportVRM), so skeleton-local equals world.
151+ const worldPosByName = new Map < string , Vector3 > ( )
152+ for ( const [ name , snap ] of snapshotByName ) {
153+ worldPosByName . set ( name , snap . absolute . getTranslation ( ) )
154+ }
155+
132156 const boneNodeIndices = new Set < number > ( )
133157 for ( const skin of json . skins ) {
134158 for ( const idx of skin . joints ) boneNodeIndices . add ( idx )
135159 }
136160
137- const tmpScale = new Vector3 ( )
138- const tmpRotation = new Quaternion ( )
139- const tmpTranslation = new Vector3 ( )
161+ // 1) Canonical TRS per bone-node.
140162 for ( const idx of boneNodeIndices ) {
141163 const node = json . nodes [ idx ]
142164 if ( ! node ?. name ) continue
143- const snap = snapshotByName . get ( node . name )
144- if ( ! snap ) continue
165+ const myWorldPos = worldPosByName . get ( node . name )
166+ if ( ! myWorldPos ) continue
167+
168+ const parentName = boneParentNameByName . get ( node . name )
169+ const parentWorldPos = parentName ? worldPosByName . get ( parentName ) : undefined
170+ const localPos = parentWorldPos ? myWorldPos . subtract ( parentWorldPos ) : myWorldPos
145171
146- snap . local . decompose ( tmpScale , tmpRotation , tmpTranslation )
147- node . translation = tmpTranslation . asArray ( )
148- node . rotation = tmpRotation . asArray ( )
149- node . scale = tmpScale . asArray ( )
172+ node . translation = [ localPos . x , localPos . y , localPos . z ]
173+ node . rotation = [ 0 , 0 , 0 , 1 ]
174+ node . scale = [ 1 , 1 , 1 ]
150175 delete node . matrix
151176 }
152177
153- // 2) Overwrite each skin's inverseBindMatrices in-place. The accessor's data
154- // is Float32 column-major 4x4 matrices packed back-to-back in the binary
155- // chunk; we mutate via a Float32Array view over the underlying buffer.
178+ // 2) IBMs = inverse pure translation. We mutate the accessor's Float32 view
179+ // over the binary chunk in place — same layout we use to read GLB chunks.
156180 for ( const skin of json . skins ) {
157181 if ( skin . inverseBindMatrices === undefined ) continue
158182 const accessor = json . accessors [ skin . inverseBindMatrices ]
@@ -165,9 +189,12 @@ function rebakeBindPose(
165189 const ibmView = new Float32Array ( binChunk , totalOffset , skin . joints . length * 16 )
166190 for ( let i = 0 ; i < skin . joints . length ; i ++ ) {
167191 const jointNode = json . nodes [ skin . joints [ i ] ]
168- const snap = jointNode ?. name ? snapshotByName . get ( jointNode . name ) : undefined
169- if ( ! snap ) continue
170- Matrix . Invert ( snap . absolute ) . copyToArray ( ibmView , i * 16 )
192+ const worldPos = jointNode ?. name ? worldPosByName . get ( jointNode . name ) : undefined
193+ if ( ! worldPos ) {
194+ Matrix . Identity ( ) . copyToArray ( ibmView , i * 16 )
195+ continue
196+ }
197+ Matrix . Translation ( - worldPos . x , - worldPos . y , - worldPos . z ) . copyToArray ( ibmView , i * 16 )
171198 }
172199 }
173200}
@@ -203,40 +230,71 @@ function mergeSkeletons(json: any): void {
203230}
204231
205232/**
206- * Wraps all top-level scene nodes in a new "VRMRoot" node with a corrective
207- * rotation. This is applied AFTER Babylon's serializer has done its handedness
208- * conversion, so it's purely additive — we're not fighting the serializer,
209- * just rotating the final result.
233+ * Restructures the glTF JSON to match the canonical layout that UniVRM (and
234+ * other clean exporters) produce:
235+ * scene roots = [hips, meshes, secondary]
236+ * Avatar_Hips becomes a top-level skeleton root, all mesh nodes get grouped
237+ * under a transformless "meshes" container, and a "secondary" empty container
238+ * is added for VRM spring-bone secondary animations.
210239 *
211- * VRM 0.x convention: avatar standing upright (Y-up), facing +Z.
212- * If the exported avatar comes out upside-down or mirrored, adjust the
213- * quaternion below empirically.
240+ * Why this matters: Babylon's serializer wraps everything inside the original
241+ * scene meshes (`parent`, `top`, `bottom`) and adds a 180° Y rotation to them
242+ * for left→right handedness conversion. Combined with our canonical rebake
243+ * those wrappers leave the avatar at a position viewers don't auto-frame.
244+ * Flattening to the reference layout fixes positioning AND helps viewers
245+ * recognize the avatar as a standard humanoid VRM.
214246 */
215- function applyOrientationFix ( json : any ) : void {
216- if ( ! json . nodes || ! json . scenes || json . scenes . length === 0 ) return
217-
218- // 180° rotation around Y so the avatar faces the camera (+Z), matching the
219- // VRM 0.x facing convention. Babylon exports the avatar facing -Z by default.
220- const correctionRotation = [ 0 , 1 , 0 , 0 ]
221-
222- // Lift the avatar so feet sit on the viewer's ground plane. Tune if needed —
223- // some viewers place their grid at chest/head height instead of Y=0.
224- const correctionTranslation = [ 0 , 1.8 , 0 ]
225-
226- const scene = json . scenes [ 0 ]
227- const originalRootIndices = [ ...scene . nodes ]
228-
229- // Create the new wrapper node
230- const wrapperIndex = json . nodes . length
231- json . nodes . push ( {
232- name : 'VRMRoot' ,
233- rotation : correctionRotation ,
234- translation : correctionTranslation ,
235- children : originalRootIndices ,
236- } )
247+ function restructureForVrm ( json : any ) : void {
248+ if ( ! json . skins || ! json . nodes || ! json . scenes || json . scenes . length === 0 ) return
249+
250+ // Find Avatar_Hips — this becomes the new scene-root skeleton.
251+ let hipsIdx = - 1
252+ for ( let i = 0 ; i < json . nodes . length ; i ++ ) {
253+ if ( json . nodes [ i ] . name === 'Avatar_Hips' ) {
254+ hipsIdx = i
255+ break
256+ }
257+ }
258+ if ( hipsIdx < 0 ) return
259+
260+ // All mesh-bearing nodes — they get moved into the new "meshes" container.
261+ const meshNodeIndices : number [ ] = [ ]
262+ for ( let i = 0 ; i < json . nodes . length ; i ++ ) {
263+ if ( json . nodes [ i ] . mesh !== undefined ) meshNodeIndices . push ( i )
264+ }
237265
238- // Replace scene roots with just the wrapper
239- scene . nodes = [ wrapperIndex ]
266+ // Detach hips and mesh nodes from any current parent.
267+ const detached = new Set < number > ( [ hipsIdx , ...meshNodeIndices ] )
268+ for ( let i = 0 ; i < json . nodes . length ; i ++ ) {
269+ const node = json . nodes [ i ]
270+ if ( node . children && node . children . length ) {
271+ node . children = node . children . filter ( ( c : number ) => ! detached . has ( c ) )
272+ if ( node . children . length === 0 ) delete node . children
273+ }
274+ }
275+
276+ // Strip transforms from mesh nodes — skinned meshes ignore their own
277+ // transform per glTF spec, and the reference VRM leaves them unset.
278+ for ( const m of meshNodeIndices ) {
279+ delete json . nodes [ m ] . translation
280+ delete json . nodes [ m ] . rotation
281+ delete json . nodes [ m ] . scale
282+ delete json . nodes [ m ] . matrix
283+ }
284+
285+ // Create the two extra scene roots.
286+ const meshesIdx = json . nodes . length
287+ json . nodes . push ( { name : 'meshes' , children : meshNodeIndices } )
288+ const secondaryIdx = json . nodes . length
289+ json . nodes . push ( { name : 'secondary' } )
290+
291+ // New scene roots: hips, meshes, secondary (mirrors juanma reference exactly).
292+ json . scenes [ 0 ] . nodes = [ hipsIdx , meshesIdx , secondaryIdx ]
293+
294+ // Point every skin's skeleton at the new hips root.
295+ for ( const skin of json . skins ) {
296+ if ( skin . skeleton !== undefined ) skin . skeleton = hipsIdx
297+ }
240298}
241299
242300function injectVRMExtension ( json : any ) : void {
@@ -396,6 +454,16 @@ export async function exportVRM(scene: Scene): Promise<Blob> {
396454 }
397455 }
398456
457+ // Map of bone name → parent bone name (using Babylon's bone hierarchy, which
458+ // is the source of truth for absolute transforms). rebakeBindPose uses this
459+ // to compute parent-relative translations in canonical form.
460+ const boneParentNameByName = new Map < string , string | null > ( )
461+ for ( const snap of boneSnapshots ) {
462+ if ( boneParentNameByName . has ( snap . bone . name ) ) continue
463+ const parent = snap . bone . getParent ( )
464+ boneParentNameByName . set ( snap . bone . name , parent ? parent . name : null )
465+ }
466+
399467 try {
400468 const glbData = await GLTF2Export . GLBAsync ( scene , 'avatar' , {
401469 shouldExportNode : ( node ) => {
@@ -410,9 +478,9 @@ export async function exportVRM(scene: Scene): Promise<Blob> {
410478 const buffer = await glbBlob . arrayBuffer ( )
411479 const { json, binChunk } = readGLBChunks ( buffer )
412480
413- rebakeBindPose ( json , binChunk , snapshotByName )
481+ rebakeBindPose ( json , binChunk , snapshotByName , boneParentNameByName )
414482 mergeSkeletons ( json )
415- applyOrientationFix ( json )
483+ restructureForVrm ( json )
416484 injectVRMExtension ( json )
417485
418486 return new Blob ( [ packGLB ( json , binChunk ) ] , { type : 'application/octet-stream' } )
0 commit comments