Skip to content

Commit e611a8d

Browse files
author
Kerkesni
committed
Merge remote-tracking branch 'origin/bugfix/ARSN-502' into w/8.2/bugfix/ARSN-502
2 parents 190210e + dd9e980 commit e611a8d

File tree

6 files changed

+224
-131
lines changed

6 files changed

+224
-131
lines changed

lib/storage/metadata/MetadataWrapper.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class MetadataWrapper {
117117
replicaSet: params.mongodb.replicaSet,
118118
readPreference: params.mongodb.readPreference,
119119
database: params.mongodb.database,
120+
instanceId: params.instanceId,
120121
replicationGroupId: params.replicationGroupId,
121122
path: params.mongodb.path,
122123
authCredentials: params.mongodb.authCredentials,

lib/storage/metadata/mongoclient/MongoClientInterface.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import {
3737
} from 'mongodb';
3838
import { v4 as uuidv4 } from 'uuid';
3939

40-
import { generateUniqueVersionId } from '../../../versioning/VersionID';
40+
import { generateVersionId } from '../../../versioning/VersionID';
4141
import * as listAlgos from '../../../algos/list/exportAlgos';
4242
import LRUCache from '../../../algos/cache/LRUCache';
4343

@@ -98,6 +98,7 @@ export type MongoDBClientInterfaceParameters = {
9898
path: string,
9999
database: string,
100100
logger: werelogs.Logger,
101+
instanceId: string,
101102
replicationGroupId: string,
102103
authCredentials: MongoUtils.AuthCredentials,
103104
isLocationTransient: Function,
@@ -245,6 +246,7 @@ class MongoClientInterface {
245246
private client: MongoClient | null;
246247
private db: Db | null;
247248
private path: string;
249+
private instanceId: string;
248250
private replicationGroupId: string;
249251
private database: string;
250252
private isLocationTransient: Function;
@@ -261,7 +263,7 @@ class MongoClientInterface {
261263

262264
constructor(params: MongoDBClientInterfaceParameters) {
263265
const { replicaSetHosts, writeConcern, replicaSet, readPreference, path,
264-
database, logger, replicationGroupId, authCredentials,
266+
database, logger, instanceId, replicationGroupId, authCredentials,
265267
isLocationTransient, shardCollections } = params;
266268
const cred = MongoUtils.credPrefix(authCredentials);
267269
this.mongoUrl = `mongodb://${cred}${replicaSetHosts}/` +
@@ -276,6 +278,7 @@ class MongoClientInterface {
276278
this.adminDb = null;
277279
this.logger = logger;
278280
this.path = path;
281+
this.instanceId = instanceId;
279282
this.replicationGroupId = replicationGroupId;
280283
this.database = database;
281284
this.isLocationTransient = isLocationTransient;
@@ -837,7 +840,7 @@ class MongoClientInterface {
837840
cb: ArsenalCallback<string>,
838841
isRetry?: boolean,
839842
) {
840-
const versionId = generateUniqueVersionId(this.replicationGroupId);
843+
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
841844
objVal.versionId = versionId;
842845
const versionKey = formatVersionKey(objName, versionId, params.vFormat);
843846
const masterKey = formatMasterKey(objName, params.vFormat);
@@ -961,7 +964,7 @@ class MongoClientInterface {
961964
log: werelogs.Logger,
962965
cb: ArsenalCallback<string>,
963966
) {
964-
const versionId = generateUniqueVersionId(this.replicationGroupId);
967+
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
965968
objVal.versionId = versionId;
966969
const masterKey = formatMasterKey(objName, params.vFormat);
967970
c.updateOne({ _id: masterKey },
@@ -1792,7 +1795,7 @@ class MongoClientInterface {
17921795
) {
17931796
const masterKey = formatMasterKey(objName, params.vFormat);
17941797
const versionKey = formatVersionKey(objName, params.versionId, params.vFormat);
1795-
const _vid = generateUniqueVersionId(this.replicationGroupId);
1798+
const _vid = generateVersionId(this.instanceId, this.replicationGroupId);
17961799
async.series([
17971800
next => c.updateOne(
17981801
{

lib/versioning/VersionID.ts

Lines changed: 95 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,69 @@
1-
// VersionID format:
1+
// Hex VersionID format:
22
// timestamp sequential_position rep_group_id other_information
33
// where:
44
// - timestamp 14 bytes epoch in ms (good untill 5138)
55
// - sequential_position 06 bytes position in the ms slot (1B ops)
66
// - rep_group_id 07 bytes replication group identifier
77
// - other_information arbitrary user input, such as a unique string
8+
//
9+
// Legacy Base62 VersionID:
10+
// timestamp sequential_position rep_group_id
11+
// where:
12+
// - timestamp 14 bytes epoch in ms
13+
// - sequential_position 06 bytes position in the ms slot
14+
// - rep_group_id 07 bytes replication group identifie
15+
//
16+
// Base62 VersionID:
17+
// timestamp sequential_position rep_group_id instance_id version_id_format
18+
// where:
19+
// - timestamp 14 bytes epoch in ms
20+
// - sequential_position 06 bytes position in the ms slot
21+
// - rep_group_id 07 bytes replication group identifier
22+
// - instance_id 06 bytes unique instance identifier
23+
// - version_id_format 02 bytes version ID format marker + version
824

925
import base62Integer from 'base62';
1026
import baseX from 'base-x';
27+
import assert from 'assert';
28+
import { VersioningConstants } from './constants';
1129
const BASE62 = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
1230
const base62String = baseX(BASE62);
1331

1432
// the lengths of the components in bytes
1533
const LENGTH_TS = 14; // timestamp: epoch in ms
1634
const LENGTH_SEQ = 6; // position in ms slot
1735
const LENGTH_RG = 7; // replication group id
36+
const LENGTH_ID = 6; // instance id
37+
const LENGTH_FT = 2; // version ID format, 1 byte + separator
1838

1939
// empty string template for the variables in a versionId
2040
const TEMPLATE_TS = new Array(LENGTH_TS + 1).join('0');
2141
const TEMPLATE_SEQ = new Array(LENGTH_SEQ + 1).join('0');
2242
const TEMPLATE_RG = new Array(LENGTH_RG + 1).join(' ');
43+
const TEMPLATE_ID = new Array(LENGTH_ID + 1).join('0');
2344

2445
export const S3_VERSION_ID_ENCODING_TYPE = process.env.S3_VERSION_ID_ENCODING_TYPE;
2546

26-
// Counter that is increased after each call to generateUniqueVersionId
27-
export let uidCounter = 0;
28-
export const versionIdSeed = getVersionIdSeed();
47+
// Flag to enable the new version ID (35 characters) over legacy shortID format (27 characters).
48+
// When enabled and S3_VERSION_ID_ENCODING_TYPE is 'base62':
49+
// - Uses new format: timestamp + sequential_position + rep_group_id + instance_id + version_id_format
50+
// - Includes instance_id field to differentiate version IDs across multiple instances in the same k8s cluster
51+
// - Appends format marker and version identifier for format detection
52+
// When disabled and S3_VERSION_ID_ENCODING_TYPE is 'base62':
53+
// - Uses old format: timestamp + sequential_position + rep_group_id (legacy 27-char format)
54+
// Falls back to hex encoding if S3_VERSION_ID_ENCODING_TYPE is 'hex' or unset
55+
export const ENABLE_FORMATTED_VERSION_ID =
56+
process.env.ENABLE_FORMATTED_VERSION_ID === 'true' ||
57+
process.env.ENABLE_FORMATTED_VERSION_ID === '1';
58+
59+
// version ID format added to the end of the version ID
60+
const VERSION_ID_FORMAT_VERSION = '1';
61+
const VERSION_ID_FORMAT_SUFFIX = `${VersioningConstants.VersionId.FormatMarker}${VERSION_ID_FORMAT_VERSION}`;
62+
assert(VERSION_ID_FORMAT_SUFFIX.length === LENGTH_FT, `versionID format must be ${LENGTH_FT} bytes`);
63+
64+
const LEGACY_BASE62_DECODED_LENGTH = 27;
65+
const BASE62_DECODED_LENGTH = 35;
66+
const BASE62_ENCODED_LENGTH = 32;
2967

3068
/**
3169
* Left-pad a string representation of a value with a given template.
@@ -91,23 +129,6 @@ function wait(span: number) {
91129
}
92130
}
93131

94-
export function getVersionIdSeed(): string {
95-
// The HOSTNAME environment variable is set by default by Kubernetes
96-
// and populated with the pod name, containing a suffix with a unique id
97-
// as a string.
98-
// By default, we rely on the pid, to account for multiple workers in
99-
// cluster mode. As a result, the unique id is either <pod-suffix>.<pid>
100-
// or <pid>.
101-
// If unique vID are needed in a multi cluster mode architecture (i.e.,
102-
// multiple server instances, each with multiple workers), the
103-
// HOSTNAME environment variable can be set.
104-
return `${process.env.HOSTNAME?.split('-').pop() || ''}${process.pid}`;
105-
}
106-
107-
export function generateUniqueVersionId(replicationGroupId: string): string {
108-
return generateVersionId(`${versionIdSeed}.${uidCounter++}`, replicationGroupId);
109-
}
110-
111132
/**
112133
* This function returns a "versionId" string indicating the current time as a
113134
* combination of the current time in millisecond, the position of the request
@@ -124,6 +145,20 @@ export function generateVersionId(info: string, replicationGroupId: string): str
124145
// replication group ID, like PARIS; will be trimmed if exceed LENGTH_RG
125146
const repGroupId = padRight(replicationGroupId, TEMPLATE_RG);
126147

148+
let otherInfo = '';
149+
let instanceIdPadded = '';
150+
let formatSuffix = '';
151+
152+
if (!S3_VERSION_ID_ENCODING_TYPE || S3_VERSION_ID_ENCODING_TYPE === 'hex') {
153+
// In HEX encoding, the full info data is used.
154+
otherInfo = info;
155+
} else if (ENABLE_FORMATTED_VERSION_ID) {
156+
// In base62, info is for the instance ID and is trimmed/padded.
157+
instanceIdPadded = padRight(info, TEMPLATE_ID);
158+
// Add the version ID format marker and version.
159+
formatSuffix = VERSION_ID_FORMAT_SUFFIX;
160+
}
161+
127162
// Need to wait for the millisecond slot got "flushed". We wait for
128163
// only a single millisecond when the module is restarted, which is
129164
// necessary for the correctness of the system. This is therefore cheap.
@@ -143,13 +178,6 @@ export function generateVersionId(info: string, replicationGroupId: string): str
143178
lastSeq = lastTimestamp === ts ? lastSeq + 1 : 0;
144179
lastTimestamp = ts;
145180

146-
// if S3_VERSION_ID_ENCODING_TYPE is "hex", info is used.
147-
if (S3_VERSION_ID_ENCODING_TYPE === 'hex' || !S3_VERSION_ID_ENCODING_TYPE) {
148-
// info field stays as is
149-
} else {
150-
info = '';
151-
}
152-
153181
// In the default cases, we reverse the chronological order of the
154182
// timestamps so that all versions of an object can be retrieved in the
155183
// reversed chronological order---newest versions first. This is because of
@@ -158,7 +186,9 @@ export function generateVersionId(info: string, replicationGroupId: string): str
158186
padLeft(MAX_TS - lastTimestamp, TEMPLATE_TS) +
159187
padLeft(MAX_SEQ - lastSeq, TEMPLATE_SEQ) +
160188
repGroupId +
161-
info
189+
otherInfo +
190+
instanceIdPadded +
191+
formatSuffix
162192
);
163193
}
164194

@@ -271,6 +301,30 @@ export function base62Decode(str: string): string | Error {
271301
export const ENC_TYPE_HEX = 0; // legacy (large) encoding
272302
export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
273303

304+
/**
305+
* Checks if the given versionId string contains the specified format version.
306+
*
307+
* @param versionId - The versionId string to check.
308+
* @param version - The expected format version.
309+
* @returns true if the versionId contains the format marker and version, false otherwise.
310+
*/
311+
function hasVersionIDFormat(versionId: string, version: string): boolean {
312+
// Format marker can only exist after the required versionId sections.
313+
// This check removes the risk of looking for the format marker in the
314+
// replication group ID, which can technically contain any character as
315+
// it's set by the end user.
316+
if (versionId.length < LENGTH_TS + LENGTH_SEQ + LENGTH_RG + LENGTH_FT) {
317+
return false; // Not enough characters for format marker
318+
}
319+
// For constant time lookup, we always assume that the format marker is
320+
// at the end of the versionId.
321+
const formatMarkerIdx = versionId.length - LENGTH_FT;
322+
if (versionId.charAt(formatMarkerIdx) !== VersioningConstants.VersionId.FormatMarker) {
323+
return false; // no format marker
324+
}
325+
return versionId.substring(formatMarkerIdx + 1) === version; // check if the version matches
326+
}
327+
274328
/**
275329
* Encode a versionId to obscure internal information contained
276330
* in a version ID.
@@ -279,8 +333,9 @@ export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
279333
* @return - the encoded versionId
280334
*/
281335
export function encode(str: string): string {
282-
// default format without 'info' field will always be 27 characters
283-
if (str.length === 27) {
336+
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
337+
// The new base62 format is 35 characters and includes the format marker at the end.
338+
if (str.length === LEGACY_BASE62_DECODED_LENGTH || hasVersionIDFormat(str, VERSION_ID_FORMAT_VERSION)) {
284339
return base62Encode(str);
285340
} // legacy format
286341
return hexEncode(str);
@@ -296,15 +351,20 @@ export function encode(str: string): string {
296351
*/
297352
export function decode(str: string): string | Error {
298353
// default format is exactly 32 characters when encoded
299-
if (str.length === 32) {
354+
if (str.length === BASE62_ENCODED_LENGTH) {
300355
const decoded: string | Error = base62Decode(str);
301-
if (typeof decoded === 'string' && decoded.length !== 27) {
302-
return new Error(`decoded ${str} is not length 27`);
356+
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
357+
// The new base62 format is always 35 characters long.
358+
if (typeof decoded === 'string' &&
359+
decoded.length !== LEGACY_BASE62_DECODED_LENGTH &&
360+
decoded.length !== BASE62_DECODED_LENGTH) {
361+
return new Error(`decoded ${str} is not length ` +
362+
`${LEGACY_BASE62_DECODED_LENGTH} or ${BASE62_DECODED_LENGTH}`);
303363
}
304364
return decoded;
305365
}
306366
// legacy format
307-
if (str.length > 32) {
367+
if (str.length > BASE62_ENCODED_LENGTH) {
308368
return hexDecode(str);
309369
}
310370
return new Error(`cannot decode str ${str.length}`);

lib/versioning/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export enum BucketVersioningFormat {
99
export const VersioningConstants = {
1010
VersionId: {
1111
Separator: '\0',
12+
FormatMarker: '?',
1213
},
1314
DbPrefixes: {
1415
Master: '\x7fM',

tests/functional/metadata/mongodb/putObject.spec.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
368368
};
369369

370370
// simulate a versionId collision by always generating the same versionId
371-
const genVID = sinon.stub(VersionID, 'generateUniqueVersionId')
371+
const genVID = sinon.stub(VersionID, 'generateVersionId')
372372
.returns('test-version-id');
373373

374374
async.series([
@@ -382,7 +382,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
382382
);
383383
// make sure the retry triggered on the first collision detection
384384
assert(genVID.calledThrice,
385-
`expected generateUniqueVersionId to be called thrice, got ${genVID.callCount} times`);
385+
`expected generateVersionId to be called thrice, got ${genVID.callCount} times`);
386386
done();
387387
});
388388
});
@@ -398,7 +398,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
398398
};
399399

400400
// simulate a versionId collision by always generating the same versionId
401-
const genVID = sinon.stub(VersionID, 'generateUniqueVersionId')
401+
const genVID = sinon.stub(VersionID, 'generateVersionId')
402402
.onFirstCall().returns('test-version-id')
403403
.onSecondCall().returns('test-version-id') // trigger collision
404404
.onThirdCall().returns('test-version-id-retry'); // change versionId on retry
@@ -412,7 +412,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
412412
assert.ifError(err, `expected no error, got ${err}`);
413413
// make sure the retry triggered on the first collision detection
414414
assert(genVID.calledThrice,
415-
`expected generateUniqueVersionId to be called thrice, got ${genVID.callCount} times`);
415+
`expected generateVersionId to be called thrice, got ${genVID.callCount} times`);
416416
// make sure the last call returned a different versionId
417417
const vid1 = JSON.parse(res[0]).versionId;
418418
const vid2 = JSON.parse(res[1]).versionId;

0 commit comments

Comments
 (0)