Skip to content

Commit ebc875d

Browse files
authored
Kg vectorize support (#38)
* embeddingApiKey support * added new vectroize parameters support * updated api report * added a couple small tests * added some vectorize tests * updated env.example * linting fixes * majorly updated vectorize tests * fixed couple breaking tests * updated build report
1 parent 09675fd commit ebc875d

25 files changed

+439
-116
lines changed

.env.example

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
1+
################################################################################
2+
# READ THE DEVGUIDE.MD FILE FOR MORE INFORMATION ON HOW TO CONFIGURE THIS FILE #
3+
################################################################################
4+
15
# Astra API endpoint
26
ASTRA_URI=https://<db_id>-<region>.apps.astra.datastax.com
37

48
# Application token, used to authenticate with the Astra API
59
APPLICATION_TOKEN=AstraCS:<rest_of_token>
610

711
# Set this to some value to enable running tests that require a $vectorize enabled environment
8-
ASTRA_RUN_VECTORIZE_TESTS=
12+
ASTRA_RUN_VECTORIZE_TESTS=1
913

1014
# Set this to some value to enable running long-running tests
11-
ASTRA_RUN_LONG_TESTS=
15+
ASTRA_RUN_LONG_TESTS=1
1216

1317
# Set this to some value to enable running admin tests
1418
ASTRA_RUN_ADMIN_TESTS=

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,5 @@ build.zip
136136

137137
temp
138138
tsdoc-metadata.json
139+
140+
tests/vectorize_credentials.json

DEVGUIDE.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,33 @@ If a new tag really, really, needs to be added, it can be done by adding a new e
7272
format, and updating the `assertTestsEnabled` function. However, this should be done sparingly, as it can make the
7373
test suite harder to manage.
7474

75+
### Running vectorize tests
76+
To run vectorize tests, you need to have a vectorize-enabled kube running, with the correct tags enabled.
77+
You must create a file, `tests/vectorize_tests.json`, with the following format:
78+
79+
```ts
80+
interface Config {
81+
[providerName: string]: {
82+
apiKey?: string,
83+
providerKey?: string,
84+
parameters?: {
85+
[modelName: string]: Record<string, string>
86+
},
87+
}
88+
}
89+
```
90+
91+
where:
92+
- `providerName` is the name of the provider (e.g. `nvidia`, `openai`, etc.) as found in `findEmbeddingProviders`
93+
- `apiKey` is the API key for the provider (which will be passed in through the header)
94+
- optional if no header auth test wanted
95+
- `providerKey` is the provider key for the provider (which will be passed in @ collection creation)
96+
- optional if no KMS auth test wanted
97+
- `parameters` is a mapping of model names to their corresponding parameters
98+
- optional if not required. `azureOpenAI`, for example, will need this.
99+
100+
This file is gitignored by default and will not be checked into VCS.
101+
75102
### Coverage testing
76103

77104
To run coverage testing, run the following command:

etc/astra-db-ts.api.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,10 @@ export type Caller = [name: string, version?: string];
175175
// @public
176176
export class Collection<Schema extends SomeDoc = SomeDoc> {
177177
// Warning: (ae-forgotten-export) The symbol "DataAPIHttpClient" needs to be exported by the entry point index.d.ts
178+
// Warning: (ae-forgotten-export) The symbol "CollectionSpawnOptions" needs to be exported by the entry point index.d.ts
178179
//
179180
// @internal
180-
constructor(db: Db, httpClient: DataAPIHttpClient, name: string, namespace: string | undefined);
181+
constructor(db: Db, httpClient: DataAPIHttpClient, name: string, opts: CollectionSpawnOptions | undefined);
181182
bulkWrite(operations: AnyBulkWriteOperation<Schema>[], options?: BulkWriteOptions): Promise<BulkWriteResult<Schema>>;
182183
readonly collectionName: string;
183184
countDocuments(filter: Filter<Schema>, upperBound: number, options?: WithTimeout): Promise<number>;
@@ -287,7 +288,7 @@ export interface CostInfo {
287288
}
288289

289290
// @public
290-
export interface CreateCollectionOptions<Schema extends SomeDoc> extends WithTimeout, CollectionOptions<Schema>, WithNamespace {
291+
export interface CreateCollectionOptions<Schema extends SomeDoc> extends WithTimeout, CollectionOptions<Schema>, CollectionSpawnOptions {
291292
checkExists?: boolean;
292293
}
293294

@@ -482,7 +483,7 @@ export class Db {
482483
// @internal
483484
constructor(endpoint: string, options: InternalRootClientOpts);
484485
admin(options?: AdminSpawnOptions): AstraDbAdmin;
485-
collection<Schema extends SomeDoc = SomeDoc>(name: string, options?: WithNamespace): Collection<Schema>;
486+
collection<Schema extends SomeDoc = SomeDoc>(name: string, options?: CollectionSpawnOptions): Collection<Schema>;
486487
collections(options?: WithNamespace & WithTimeout): Promise<Collection[]>;
487488
command(command: Record<string, any>, options?: RunCommandOptions): Promise<RawDataAPIResponse>;
488489
createCollection<Schema extends SomeDoc = SomeDoc>(collectionName: string, options?: CreateCollectionOptions<Schema>): Promise<Collection<Schema>>;
@@ -962,8 +963,9 @@ export interface ReplaceOneOptions extends WithTimeout {
962963
export type ReplaceOneResult<Schema extends SomeDoc> = InternalUpdateResult<Schema, 0 | 1>;
963964

964965
// @public
965-
export interface RunCommandOptions extends WithNamespace, WithTimeout {
966+
export interface RunCommandOptions extends WithTimeout {
966967
collection?: string;
968+
namespace?: string | null;
967969
}
968970

969971
// @public
@@ -1170,7 +1172,9 @@ export interface VectorDoc {
11701172

11711173
// @alpha
11721174
export interface VectorizeServiceOptions {
1175+
authentication?: Record<string, string | undefined>;
11731176
modelName: string;
1177+
parameters?: Record<string, unknown>;
11741178
provider: string;
11751179
}
11761180

src/api/clients/data-api-http-client.ts

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import {
1717
DEFAULT_DATA_API_AUTH_HEADER,
18+
DEFAULT_EMBEDDING_API_KEY_HEADER,
1819
DEFAULT_NAMESPACE,
1920
DEFAULT_TIMEOUT,
2021
hrTimeMs,
@@ -23,7 +24,7 @@ import {
2324
HttpMethods,
2425
RawDataAPIResponse,
2526
} from '@/src/api';
26-
import { DataAPIResponseError, DataAPITimeoutError, ObjectId, UUID, WithNamespace } from '@/src/data-api';
27+
import { DataAPIResponseError, DataAPITimeoutError, ObjectId, UUID } from '@/src/data-api';
2728
import { TimeoutManager, TimeoutOptions } from '@/src/api/timeout-managers';
2829
import { CommandFailedEvent, CommandStartedEvent, CommandSucceededEvent } from '@/src/data-api/events';
2930
import { CollectionNotFoundError, DataAPIHttpError, mkRespErrorFromResponse } from '@/src/data-api/errors';
@@ -32,16 +33,20 @@ import { CollectionNotFoundError, DataAPIHttpError, mkRespErrorFromResponse } fr
3233
* @internal
3334
*/
3435
export interface DataAPIRequestInfo {
35-
url: string;
36-
collection?: string;
37-
namespace?: string;
38-
command: Record<string, any>;
39-
timeoutManager: TimeoutManager;
36+
url: string,
37+
collection?: string,
38+
namespace?: string | null,
39+
command: Record<string, any>,
40+
timeoutManager: TimeoutManager,
4041
}
4142

4243
interface ExecuteCommandOptions {
43-
collection?: string;
44-
namespace?: string;
44+
namespace?: string | null,
45+
collection?: string,
46+
}
47+
48+
interface DataAPIHttpClientOptions extends HTTPClientOptions {
49+
namespace: string | undefined,
4550
}
4651

4752
/**
@@ -50,16 +55,16 @@ interface ExecuteCommandOptions {
5055
export class DataAPIHttpClient extends HttpClient {
5156
public collection?: string;
5257
public namespace?: string;
53-
readonly #props: HTTPClientOptions & WithNamespace;
58+
readonly #props: DataAPIHttpClientOptions;
5459

55-
constructor(props: HTTPClientOptions & WithNamespace) {
56-
super(props, mkAuthHeader);
60+
constructor(props: DataAPIHttpClientOptions, embeddingApiKey?: string) {
61+
super(props, mkHeaders(embeddingApiKey));
5762
this.namespace = props.namespace;
5863
this.#props = props;
5964
}
6065

61-
public withCollection(namespace: string, collection: string): DataAPIHttpClient {
62-
const clone = new DataAPIHttpClient(this.#props);
66+
public forCollection(namespace: string, collection: string, embeddingApiKey: string | undefined): DataAPIHttpClient {
67+
const clone = new DataAPIHttpClient(this.#props, embeddingApiKey);
6368
clone.collection = collection;
6469
clone.namespace = namespace;
6570
return clone;
@@ -86,9 +91,12 @@ export class DataAPIHttpClient extends HttpClient {
8691

8792
try {
8893
info.collection ||= this.collection;
89-
info.namespace ||= this.namespace || DEFAULT_NAMESPACE;
9094

91-
const keyspacePath = `/${info.namespace}`;
95+
if (info.namespace !== null) {
96+
info.namespace ||= this.namespace || DEFAULT_NAMESPACE;
97+
}
98+
99+
const keyspacePath = info.namespace ? `/${info.namespace}` : '';
92100
const collectionPath = info.collection ? `/${info.collection}` : '';
93101
info.url += keyspacePath + collectionPath;
94102

@@ -117,7 +125,7 @@ export class DataAPIHttpClient extends HttpClient {
117125

118126
if (data.errors && data?.errors?.length > 0 && data?.errors[0]?.errorCode === 'COLLECTION_NOT_EXIST') {
119127
const name = data?.errors[0]?.message.split(': ')[1];
120-
throw new CollectionNotFoundError(info.namespace, name);
128+
throw new CollectionNotFoundError(info.namespace!, name);
121129
}
122130

123131
if (data?.errors && data?.errors.length > 0) {
@@ -193,6 +201,15 @@ export function reviver(_: string, value: any): any {
193201
return value;
194202
}
195203

196-
function mkAuthHeader(token: string): Record<string, any> {
197-
return { [DEFAULT_DATA_API_AUTH_HEADER]: token };
204+
function mkHeaders(embeddingApiKey: string | undefined) {
205+
if (embeddingApiKey) {
206+
return (token: string) => ({
207+
[DEFAULT_EMBEDDING_API_KEY_HEADER]: embeddingApiKey,
208+
[DEFAULT_DATA_API_AUTH_HEADER]: token,
209+
});
210+
} else {
211+
return (token: string) => ({
212+
[DEFAULT_DATA_API_AUTH_HEADER]: token,
213+
});
214+
}
198215
}

src/api/clients/devops-api-http-client.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ interface DevopsAPIResponse {
5555
*/
5656
export class DevOpsAPIHttpClient extends HttpClient {
5757
constructor(opts: HTTPClientOptions) {
58-
super(opts, mkAuthHeader);
58+
super(opts, mkHeaders);
5959
}
6060

6161
public async request(req: DevOpsAPIRequestInfo, options: TimeoutOptions | undefined, started: number = 0): Promise<DevopsAPIResponse> {
@@ -187,6 +187,6 @@ export class DevOpsAPIHttpClient extends HttpClient {
187187
}
188188
}
189189

190-
function mkAuthHeader(token: string) {
190+
function mkHeaders(token: string) {
191191
return { [DEFAULT_DEVOPS_API_AUTH_HEADER]: `Bearer ${token}` };
192192
}

src/api/clients/http-client.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import { CLIENT_USER_AGENT, RAGSTACK_REQUESTED_WITH } from '@/src/api/constants'
1616
import { Caller, DataAPIClientEvents } from '@/src/client';
1717
import TypedEmitter from 'typed-emitter';
1818
import { FetchCtx, ResponseInfo } from '@/src/api/fetch/types';
19-
import { AuthHeaderFactory, HTTPClientOptions, HTTPRequestInfo } from '@/src/api/clients/types';
19+
import { MkBaseHeaders, HTTPClientOptions, HTTPRequestInfo } from '@/src/api/clients/types';
2020

2121
/**
2222
* @internal
@@ -29,7 +29,7 @@ export abstract class HttpClient {
2929
readonly #applicationToken: string;
3030
readonly baseHeaders: Record<string, any>;
3131

32-
protected constructor(options: HTTPClientOptions, mkAuthHeader: AuthHeaderFactory) {
32+
protected constructor(options: HTTPClientOptions, mkAuthHeader: MkBaseHeaders) {
3333
this.#applicationToken = options.applicationToken;
3434
this.baseUrl = options.baseUrl;
3535
this.emitter = options.emitter;

src/api/clients/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export interface HTTPClientOptions {
1818
/**
1919
* @internal
2020
*/
21-
export type AuthHeaderFactory = (token: string) => Record<string, any>;
21+
export type MkBaseHeaders = (token: string) => Record<string, any>;
2222

2323
/**
2424
* @internal

src/api/constants.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ export const DEFAULT_NAMESPACE = 'default_keyspace';
5353
*/
5454
export const DEFAULT_TIMEOUT = 30000;
5555

56+
/**
57+
* @internal
58+
*/
59+
export const DEFAULT_EMBEDDING_API_KEY_HEADER = 'x-embedding-api-key';
60+
5661
/**
5762
* @internal
5863
*/

src/data-api/collection.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ import { FindOneAndDeleteCommand } from '@/src/data-api/types/find/find-one-dele
7575
import { FindOneAndUpdateCommand } from '@/src/data-api/types/find/find-one-update';
7676
import { InsertManyCommand } from '@/src/data-api/types/insert/insert-many';
7777
import { Mutable } from '@/src/data-api/types/utils';
78+
import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection';
7879

7980
/**
8081
* Represents the interface to a collection in the database.
@@ -118,19 +119,19 @@ export class Collection<Schema extends SomeDoc = SomeDoc> {
118119
*
119120
* @internal
120121
*/
121-
constructor(db: Db, httpClient: DataAPIHttpClient, name: string, namespace: string | undefined) {
122+
constructor(db: Db, httpClient: DataAPIHttpClient, name: string, opts: CollectionSpawnOptions | undefined) {
122123
Object.defineProperty(this, 'collectionName', {
123124
value: name,
124125
writable: false,
125126
});
126127

127128
Object.defineProperty(this, 'namespace', {
128-
value: namespace ?? db.namespace,
129+
value: opts?.namespace ?? db.namespace,
129130
writable: false,
130131
});
131132

132133
Object.defineProperty(this, '_httpClient', {
133-
value: httpClient.withCollection(this.namespace, this.collectionName),
134+
value: httpClient.forCollection(this.namespace, this.collectionName, opts?.embeddingApiKey),
134135
enumerable: false,
135136
});
136137

src/data-api/db.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import { extractDbIdFromUrl, validateOption } from '@/src/data-api/utils';
3030
import { CreateCollectionCommand } from '@/src/data-api/types/collections/create-collection';
3131
import { ListCollectionsCommand } from '@/src/data-api/types/collections/list-collection';
3232
import { InternalRootClientOpts } from '@/src/client/types';
33+
import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection';
3334

3435
/**
3536
* Represents an interface to some Astra database instance. This is the entrypoint for database-level DML, such as
@@ -244,8 +245,8 @@ export class Db {
244245
* @see SomeDoc
245246
* @see VectorDoc
246247
*/
247-
public collection<Schema extends SomeDoc = SomeDoc>(name: string, options?: WithNamespace): Collection<Schema> {
248-
return new Collection<Schema>(this, this._httpClient, name, options?.namespace);
248+
public collection<Schema extends SomeDoc = SomeDoc>(name: string, options?: CollectionSpawnOptions): Collection<Schema> {
249+
return new Collection<Schema>(this, this._httpClient, name, options);
249250
}
250251

251252
/**

src/data-api/types/collections/collections-common.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ export interface VectorizeServiceOptions {
6262
* @alpha
6363
*/
6464
modelName: string,
65+
/**
66+
* NOTE: This feature is under current development.
67+
*
68+
* @alpha
69+
*/
70+
authentication?: Record<string, string | undefined>,
71+
/**
72+
* NOTE: This feature is under current development.
73+
*
74+
* @alpha
75+
*/
76+
parameters?: Record<string, unknown>,
6577
}
6678

6779
/**

src/data-api/types/collections/command.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
import { WithNamespace } from '@/src/data-api';
1615
import { WithTimeout } from '@/src/common';
1716

1817
/**
@@ -25,9 +24,13 @@ import { WithTimeout } from '@/src/common';
2524
*
2625
* @public
2726
*/
28-
export interface RunCommandOptions extends WithNamespace, WithTimeout {
27+
export interface RunCommandOptions extends WithTimeout {
2928
/**
3029
* The collection to run the command on. If not provided, the command is run on the database.
3130
*/
32-
collection?: string
31+
collection?: string,
32+
/**
33+
* The namespace (aka keyspace) to use for the db operation.
34+
*/
35+
namespace?: string | null,
3336
}

src/data-api/types/collections/create-collection.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
import { SomeDoc, WithNamespace } from '@/src/data-api';
15+
import { SomeDoc } from '@/src/data-api';
1616
import { CollectionOptions } from '@/src/data-api/types';
1717
import { WithTimeout } from '@/src/common/types';
18+
import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection';
1819

1920
/** @internal */
2021
export interface CreateCollectionCommand {
@@ -38,7 +39,7 @@ export interface CreateCollectionCommand {
3839
*
3940
* @public
4041
*/
41-
export interface CreateCollectionOptions<Schema extends SomeDoc> extends WithTimeout, CollectionOptions<Schema>, WithNamespace {
42+
export interface CreateCollectionOptions<Schema extends SomeDoc> extends WithTimeout, CollectionOptions<Schema>, CollectionSpawnOptions {
4243
/**
4344
* If `true` or unset, runs an additional existence check before creating the collection, failing if the collection
4445
* with the same name already exists, raising a {@link CollectionAlreadyExistsError}.

0 commit comments

Comments
 (0)