@@ -263,6 +263,109 @@ func TestMetadataMigrationPreservesJSONDatatypeForUserDocs(t *testing.T) {
263263 assert .NotContains (t , resp .Body .String (), "binary datatype" , "the migrated user doc must not be Binary-tagged on primary" )
264264}
265265
266+ // TestMetadataMigrationSkipsPrimaryWriteWhenUpdatedUserAlreadyMigrated pins the
267+ // in-flight-update / stale-fallback-shadow race through the real auth path:
268+ //
269+ // 1. A user is created in legacy mode, so the user doc lands on the eventual fallback
270+ // collection (_default._default).
271+ // 2. The opt-in is flipped, switching the MetadataStore to the dual-collection wrapper.
272+ // At this point fallback still holds the original user body; primary is empty.
273+ // 3. The user is updated via the admin API *before* the migration runs.
274+ // `auth.Authenticator.Update` → `MetadataStore.Update` is read-through-fallback /
275+ // write-to-primary, so the fresh body lands on primary while the stale original
276+ // stays on fallback as a shadow.
277+ // 4. Migration runs and walks the fallback. For the user key, moveFallbackDoc's
278+ // Primary().Add returns (added=false, err=nil) because primary already holds the
279+ // fresher copy. That outcome MUST be treated as success — not an error — and the
280+ // stale fallback shadow MUST be deleted, leaving the fresh primary copy intact.
281+ //
282+ // What this would catch:
283+ // - moveFallbackDoc surfacing the already-exists Add as a stats.Errors increment.
284+ // - A naive migration that overwrote primary with the stale fallback bytes.
285+ // - A migration that skipped the fallback delete on the already-exists branch, leaving
286+ // a stale shadow that would resurrect on the next non-xattr read.
287+ func TestMetadataMigrationSkipsPrimaryWriteWhenUpdatedUserAlreadyMigrated (t * testing.T ) {
288+ rt := rest .NewRestTesterPersistentConfigNoDB (t )
289+ defer rt .Close ()
290+
291+ // 1. Legacy mode: user write lands on _default._default.
292+ dbConfig := rt .NewDbConfig ()
293+ dbConfig .UseSystemMobileMetadataCollection = base .Ptr (false )
294+ resp := rt .CreateDatabase ("db" , dbConfig )
295+ rest .RequireStatus (t , resp , http .StatusCreated )
296+
297+ const originalUser = `{"name":"alice","password":"letmein","admin_channels":["public"]}`
298+ resp = rt .SendAdminRequest (http .MethodPut , "/{{.db}}/_user/alice" , originalUser )
299+ rest .RequireStatus (t , resp , http .StatusCreated )
300+
301+ // 2. Flip the opt-in: MetadataStore becomes the dual-collection wrapper. The user
302+ // doc lives on fallback (_default._default); primary (_system._mobile) is empty.
303+ dbConfig .UseSystemMobileMetadataCollection = base .Ptr (true )
304+ resp = rt .UpsertDbConfig ("db" , dbConfig )
305+ rest .RequireStatus (t , resp , http .StatusCreated )
306+
307+ ms , ok := rt .GetDatabase ().MetadataStore .(* base.MetadataStore )
308+ require .True (t , ok , "after opt-in the MetadataStore must be the dual-collection wrapper" )
309+ userKey := rt .GetDatabase ().MetadataKeys .UserKey ("alice" )
310+
311+ // Sanity: before any update, the user body is only on fallback.
312+ _ , _ , err := ms .Fallback ().GetRaw (rt .Context (), userKey )
313+ require .NoError (t , err , "user doc must be on fallback before the in-flight update" )
314+ primaryExists , err := ms .Primary ().Exists (rt .Context (), userKey )
315+ require .NoError (t , err )
316+ require .False (t , primaryExists , "primary must be empty before the in-flight update" )
317+
318+ // 3. Update the user before the migration runs. Authenticator.Update goes through
319+ // MetadataStore.Update, which reads through fallback and writes the fresh body to
320+ // primary. After this the fallback still holds the original body as a stale shadow.
321+ const updatedUser = `{"name":"alice","admin_channels":["public","secrets"]}`
322+ resp = rt .SendAdminRequest (http .MethodPut , "/{{.db}}/_user/alice" , updatedUser )
323+ rest .RequireStatus (t , resp , http .StatusOK )
324+
325+ primaryBodyBeforeMigration , _ , err := ms .Primary ().GetRaw (rt .Context (), userKey )
326+ require .NoError (t , err , "primary must hold the post-update user body after the in-flight Update" )
327+ require .Contains (t , string (primaryBodyBeforeMigration ), `"secrets"` ,
328+ "primary should carry the updated admin_channels, fallback should still hold the original" )
329+ fallbackBodyBeforeMigration , _ , err := ms .Fallback ().GetRaw (rt .Context (), userKey )
330+ require .NoError (t , err , "stale fallback shadow must still exist before migration runs" )
331+ require .NotEqual (t , primaryBodyBeforeMigration , fallbackBodyBeforeMigration ,
332+ "primary fresh body and fallback stale shadow must structurally differ — otherwise this test isn't exercising the race" )
333+
334+ // 4. Drive the migration directly through the admin endpoint and assert it reaches
335+ // completed with zero failures — the already-exists Add MUST NOT be classified as
336+ // an error.
337+ resp = rt .SendAdminRequest (http .MethodPost , "/{{.db}}/_metadata_migration?action=start" , "" )
338+ rest .RequireStatus (t , resp , http .StatusOK )
339+
340+ require .EventuallyWithT (t , func (c * assert.CollectT ) {
341+ st := rt .SendAdminRequest (http .MethodGet , "/{{.db}}/_metadata_migration" , "" )
342+ body := st .Body .String ()
343+ assert .NotContains (c , body , `"status":"error"` , "migration must not error on the already-exists branch — payload: %s" , body )
344+ assert .Contains (c , body , `"status":"completed"` , "migration should reach completed — payload: %s" , body )
345+ assert .Contains (c , body , `"docs_failed":0` , "already-exists Add must not be counted as a failure — payload: %s" , body )
346+ }, 30 * time .Second , 200 * time .Millisecond )
347+
348+ // 5a. Primary must still hold the FRESH body — the migration must not have
349+ // overwritten it with the stale fallback shadow.
350+ primaryBodyAfterMigration , _ , err := ms .Primary ().GetRaw (rt .Context (), userKey )
351+ require .NoError (t , err , "primary must still hold the user post-migration" )
352+ assert .Equal (t , primaryBodyBeforeMigration , primaryBodyAfterMigration ,
353+ "primary body must be byte-identical to the pre-migration fresh write — a regression that overwrote with the stale fallback shadow would change this" )
354+
355+ // 5b. Fallback shadow must be gone — the already-exists short-circuit must still
356+ // run the cleanup delete. Leaving the shadow would resurrect the stale body on
357+ // any subsequent non-xattr read path.
358+ _ , _ , err = ms .Fallback ().GetRaw (rt .Context (), userKey )
359+ assert .True (t , base .IsDocNotFoundError (err ),
360+ "stale fallback shadow must be cleaned up even when primary already held a fresher copy — got err: %v" , err )
361+
362+ // 5c. End-to-end: the public auth view of the user is the fresh post-update shape.
363+ resp = rt .SendAdminRequest (http .MethodGet , "/{{.db}}/_user/alice" , "" )
364+ rest .RequireStatus (t , resp , http .StatusOK )
365+ assert .Contains (t , resp .Body .String (), `"secrets"` ,
366+ "GET /_user/alice must reflect the in-flight update — not the pre-update fallback shadow" )
367+ }
368+
266369// TestMetadataMigrationOptInIsIrreversibleViaREST verifies that, once a database has opted in
267370// to the system metadata collection, a config update over the REST API that attempts to disable
268371// the opt-in (set to false or omit it) is rejected.
0 commit comments