Skip to content

Commit

Permalink
Kg vectorize support (#38)
Browse files Browse the repository at this point in the history
* embeddingApiKey support

* added new vectroize parameters support

* updated api report

* added a couple small tests

* added some vectorize tests

* updated env.example

* linting fixes

* majorly updated vectorize tests

* fixed couple breaking tests

* updated build report
  • Loading branch information
toptobes authored May 22, 2024
1 parent 09675fd commit ebc875d
Show file tree
Hide file tree
Showing 25 changed files with 439 additions and 116 deletions.
8 changes: 6 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
################################################################################
# READ THE DEVGUIDE.MD FILE FOR MORE INFORMATION ON HOW TO CONFIGURE THIS FILE #
################################################################################

# Astra API endpoint
ASTRA_URI=https://<db_id>-<region>.apps.astra.datastax.com

# Application token, used to authenticate with the Astra API
APPLICATION_TOKEN=AstraCS:<rest_of_token>

# Set this to some value to enable running tests that require a $vectorize enabled environment
ASTRA_RUN_VECTORIZE_TESTS=
ASTRA_RUN_VECTORIZE_TESTS=1

# Set this to some value to enable running long-running tests
ASTRA_RUN_LONG_TESTS=
ASTRA_RUN_LONG_TESTS=1

# Set this to some value to enable running admin tests
ASTRA_RUN_ADMIN_TESTS=
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,5 @@ build.zip

temp
tsdoc-metadata.json

tests/vectorize_credentials.json
27 changes: 27 additions & 0 deletions DEVGUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,33 @@ If a new tag really, really, needs to be added, it can be done by adding a new e
format, and updating the `assertTestsEnabled` function. However, this should be done sparingly, as it can make the
test suite harder to manage.

### Running vectorize tests
To run vectorize tests, you need to have a vectorize-enabled kube running, with the correct tags enabled.
You must create a file, `tests/vectorize_tests.json`, with the following format:

```ts
interface Config {
[providerName: string]: {
apiKey?: string,
providerKey?: string,
parameters?: {
[modelName: string]: Record<string, string>
},
}
}
```

where:
- `providerName` is the name of the provider (e.g. `nvidia`, `openai`, etc.) as found in `findEmbeddingProviders`
- `apiKey` is the API key for the provider (which will be passed in through the header)
- optional if no header auth test wanted
- `providerKey` is the provider key for the provider (which will be passed in @ collection creation)
- optional if no KMS auth test wanted
- `parameters` is a mapping of model names to their corresponding parameters
- optional if not required. `azureOpenAI`, for example, will need this.

This file is gitignored by default and will not be checked into VCS.

### Coverage testing

To run coverage testing, run the following command:
Expand Down
12 changes: 8 additions & 4 deletions etc/astra-db-ts.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,10 @@ export type Caller = [name: string, version?: string];
// @public
export class Collection<Schema extends SomeDoc = SomeDoc> {
// Warning: (ae-forgotten-export) The symbol "DataAPIHttpClient" needs to be exported by the entry point index.d.ts
// Warning: (ae-forgotten-export) The symbol "CollectionSpawnOptions" needs to be exported by the entry point index.d.ts
//
// @internal
constructor(db: Db, httpClient: DataAPIHttpClient, name: string, namespace: string | undefined);
constructor(db: Db, httpClient: DataAPIHttpClient, name: string, opts: CollectionSpawnOptions | undefined);
bulkWrite(operations: AnyBulkWriteOperation<Schema>[], options?: BulkWriteOptions): Promise<BulkWriteResult<Schema>>;
readonly collectionName: string;
countDocuments(filter: Filter<Schema>, upperBound: number, options?: WithTimeout): Promise<number>;
Expand Down Expand Up @@ -287,7 +288,7 @@ export interface CostInfo {
}

// @public
export interface CreateCollectionOptions<Schema extends SomeDoc> extends WithTimeout, CollectionOptions<Schema>, WithNamespace {
export interface CreateCollectionOptions<Schema extends SomeDoc> extends WithTimeout, CollectionOptions<Schema>, CollectionSpawnOptions {
checkExists?: boolean;
}

Expand Down Expand Up @@ -482,7 +483,7 @@ export class Db {
// @internal
constructor(endpoint: string, options: InternalRootClientOpts);
admin(options?: AdminSpawnOptions): AstraDbAdmin;
collection<Schema extends SomeDoc = SomeDoc>(name: string, options?: WithNamespace): Collection<Schema>;
collection<Schema extends SomeDoc = SomeDoc>(name: string, options?: CollectionSpawnOptions): Collection<Schema>;
collections(options?: WithNamespace & WithTimeout): Promise<Collection[]>;
command(command: Record<string, any>, options?: RunCommandOptions): Promise<RawDataAPIResponse>;
createCollection<Schema extends SomeDoc = SomeDoc>(collectionName: string, options?: CreateCollectionOptions<Schema>): Promise<Collection<Schema>>;
Expand Down Expand Up @@ -962,8 +963,9 @@ export interface ReplaceOneOptions extends WithTimeout {
export type ReplaceOneResult<Schema extends SomeDoc> = InternalUpdateResult<Schema, 0 | 1>;

// @public
export interface RunCommandOptions extends WithNamespace, WithTimeout {
export interface RunCommandOptions extends WithTimeout {
collection?: string;
namespace?: string | null;
}

// @public
Expand Down Expand Up @@ -1170,7 +1172,9 @@ export interface VectorDoc {

// @alpha
export interface VectorizeServiceOptions {
authentication?: Record<string, string | undefined>;
modelName: string;
parameters?: Record<string, unknown>;
provider: string;
}

Expand Down
53 changes: 35 additions & 18 deletions src/api/clients/data-api-http-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import {
DEFAULT_DATA_API_AUTH_HEADER,
DEFAULT_EMBEDDING_API_KEY_HEADER,
DEFAULT_NAMESPACE,
DEFAULT_TIMEOUT,
hrTimeMs,
Expand All @@ -23,7 +24,7 @@ import {
HttpMethods,
RawDataAPIResponse,
} from '@/src/api';
import { DataAPIResponseError, DataAPITimeoutError, ObjectId, UUID, WithNamespace } from '@/src/data-api';
import { DataAPIResponseError, DataAPITimeoutError, ObjectId, UUID } from '@/src/data-api';
import { TimeoutManager, TimeoutOptions } from '@/src/api/timeout-managers';
import { CommandFailedEvent, CommandStartedEvent, CommandSucceededEvent } from '@/src/data-api/events';
import { CollectionNotFoundError, DataAPIHttpError, mkRespErrorFromResponse } from '@/src/data-api/errors';
Expand All @@ -32,16 +33,20 @@ import { CollectionNotFoundError, DataAPIHttpError, mkRespErrorFromResponse } fr
* @internal
*/
export interface DataAPIRequestInfo {
url: string;
collection?: string;
namespace?: string;
command: Record<string, any>;
timeoutManager: TimeoutManager;
url: string,
collection?: string,
namespace?: string | null,
command: Record<string, any>,
timeoutManager: TimeoutManager,
}

interface ExecuteCommandOptions {
collection?: string;
namespace?: string;
namespace?: string | null,
collection?: string,
}

interface DataAPIHttpClientOptions extends HTTPClientOptions {
namespace: string | undefined,
}

/**
Expand All @@ -50,16 +55,16 @@ interface ExecuteCommandOptions {
export class DataAPIHttpClient extends HttpClient {
public collection?: string;
public namespace?: string;
readonly #props: HTTPClientOptions & WithNamespace;
readonly #props: DataAPIHttpClientOptions;

constructor(props: HTTPClientOptions & WithNamespace) {
super(props, mkAuthHeader);
constructor(props: DataAPIHttpClientOptions, embeddingApiKey?: string) {
super(props, mkHeaders(embeddingApiKey));
this.namespace = props.namespace;
this.#props = props;
}

public withCollection(namespace: string, collection: string): DataAPIHttpClient {
const clone = new DataAPIHttpClient(this.#props);
public forCollection(namespace: string, collection: string, embeddingApiKey: string | undefined): DataAPIHttpClient {
const clone = new DataAPIHttpClient(this.#props, embeddingApiKey);
clone.collection = collection;
clone.namespace = namespace;
return clone;
Expand All @@ -86,9 +91,12 @@ export class DataAPIHttpClient extends HttpClient {

try {
info.collection ||= this.collection;
info.namespace ||= this.namespace || DEFAULT_NAMESPACE;

const keyspacePath = `/${info.namespace}`;
if (info.namespace !== null) {
info.namespace ||= this.namespace || DEFAULT_NAMESPACE;
}

const keyspacePath = info.namespace ? `/${info.namespace}` : '';
const collectionPath = info.collection ? `/${info.collection}` : '';
info.url += keyspacePath + collectionPath;

Expand Down Expand Up @@ -117,7 +125,7 @@ export class DataAPIHttpClient extends HttpClient {

if (data.errors && data?.errors?.length > 0 && data?.errors[0]?.errorCode === 'COLLECTION_NOT_EXIST') {
const name = data?.errors[0]?.message.split(': ')[1];
throw new CollectionNotFoundError(info.namespace, name);
throw new CollectionNotFoundError(info.namespace!, name);
}

if (data?.errors && data?.errors.length > 0) {
Expand Down Expand Up @@ -193,6 +201,15 @@ export function reviver(_: string, value: any): any {
return value;
}

function mkAuthHeader(token: string): Record<string, any> {
return { [DEFAULT_DATA_API_AUTH_HEADER]: token };
function mkHeaders(embeddingApiKey: string | undefined) {
if (embeddingApiKey) {
return (token: string) => ({
[DEFAULT_EMBEDDING_API_KEY_HEADER]: embeddingApiKey,
[DEFAULT_DATA_API_AUTH_HEADER]: token,
});
} else {
return (token: string) => ({
[DEFAULT_DATA_API_AUTH_HEADER]: token,
});
}
}
4 changes: 2 additions & 2 deletions src/api/clients/devops-api-http-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ interface DevopsAPIResponse {
*/
export class DevOpsAPIHttpClient extends HttpClient {
constructor(opts: HTTPClientOptions) {
super(opts, mkAuthHeader);
super(opts, mkHeaders);
}

public async request(req: DevOpsAPIRequestInfo, options: TimeoutOptions | undefined, started: number = 0): Promise<DevopsAPIResponse> {
Expand Down Expand Up @@ -187,6 +187,6 @@ export class DevOpsAPIHttpClient extends HttpClient {
}
}

function mkAuthHeader(token: string) {
function mkHeaders(token: string) {
return { [DEFAULT_DEVOPS_API_AUTH_HEADER]: `Bearer ${token}` };
}
4 changes: 2 additions & 2 deletions src/api/clients/http-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { CLIENT_USER_AGENT, RAGSTACK_REQUESTED_WITH } from '@/src/api/constants'
import { Caller, DataAPIClientEvents } from '@/src/client';
import TypedEmitter from 'typed-emitter';
import { FetchCtx, ResponseInfo } from '@/src/api/fetch/types';
import { AuthHeaderFactory, HTTPClientOptions, HTTPRequestInfo } from '@/src/api/clients/types';
import { MkBaseHeaders, HTTPClientOptions, HTTPRequestInfo } from '@/src/api/clients/types';

/**
* @internal
Expand All @@ -29,7 +29,7 @@ export abstract class HttpClient {
readonly #applicationToken: string;
readonly baseHeaders: Record<string, any>;

protected constructor(options: HTTPClientOptions, mkAuthHeader: AuthHeaderFactory) {
protected constructor(options: HTTPClientOptions, mkAuthHeader: MkBaseHeaders) {
this.#applicationToken = options.applicationToken;
this.baseUrl = options.baseUrl;
this.emitter = options.emitter;
Expand Down
2 changes: 1 addition & 1 deletion src/api/clients/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export interface HTTPClientOptions {
/**
* @internal
*/
export type AuthHeaderFactory = (token: string) => Record<string, any>;
export type MkBaseHeaders = (token: string) => Record<string, any>;

/**
* @internal
Expand Down
5 changes: 5 additions & 0 deletions src/api/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ export const DEFAULT_NAMESPACE = 'default_keyspace';
*/
export const DEFAULT_TIMEOUT = 30000;

/**
* @internal
*/
export const DEFAULT_EMBEDDING_API_KEY_HEADER = 'x-embedding-api-key';

/**
* @internal
*/
Expand Down
7 changes: 4 additions & 3 deletions src/data-api/collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ import { FindOneAndDeleteCommand } from '@/src/data-api/types/find/find-one-dele
import { FindOneAndUpdateCommand } from '@/src/data-api/types/find/find-one-update';
import { InsertManyCommand } from '@/src/data-api/types/insert/insert-many';
import { Mutable } from '@/src/data-api/types/utils';
import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection';

/**
* Represents the interface to a collection in the database.
Expand Down Expand Up @@ -118,19 +119,19 @@ export class Collection<Schema extends SomeDoc = SomeDoc> {
*
* @internal
*/
constructor(db: Db, httpClient: DataAPIHttpClient, name: string, namespace: string | undefined) {
constructor(db: Db, httpClient: DataAPIHttpClient, name: string, opts: CollectionSpawnOptions | undefined) {
Object.defineProperty(this, 'collectionName', {
value: name,
writable: false,
});

Object.defineProperty(this, 'namespace', {
value: namespace ?? db.namespace,
value: opts?.namespace ?? db.namespace,
writable: false,
});

Object.defineProperty(this, '_httpClient', {
value: httpClient.withCollection(this.namespace, this.collectionName),
value: httpClient.forCollection(this.namespace, this.collectionName, opts?.embeddingApiKey),
enumerable: false,
});

Expand Down
5 changes: 3 additions & 2 deletions src/data-api/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import { extractDbIdFromUrl, validateOption } from '@/src/data-api/utils';
import { CreateCollectionCommand } from '@/src/data-api/types/collections/create-collection';
import { ListCollectionsCommand } from '@/src/data-api/types/collections/list-collection';
import { InternalRootClientOpts } from '@/src/client/types';
import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection';

/**
* Represents an interface to some Astra database instance. This is the entrypoint for database-level DML, such as
Expand Down Expand Up @@ -244,8 +245,8 @@ export class Db {
* @see SomeDoc
* @see VectorDoc
*/
public collection<Schema extends SomeDoc = SomeDoc>(name: string, options?: WithNamespace): Collection<Schema> {
return new Collection<Schema>(this, this._httpClient, name, options?.namespace);
public collection<Schema extends SomeDoc = SomeDoc>(name: string, options?: CollectionSpawnOptions): Collection<Schema> {
return new Collection<Schema>(this, this._httpClient, name, options);
}

/**
Expand Down
12 changes: 12 additions & 0 deletions src/data-api/types/collections/collections-common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ export interface VectorizeServiceOptions {
* @alpha
*/
modelName: string,
/**
* NOTE: This feature is under current development.
*
* @alpha
*/
authentication?: Record<string, string | undefined>,
/**
* NOTE: This feature is under current development.
*
* @alpha
*/
parameters?: Record<string, unknown>,
}

/**
Expand Down
9 changes: 6 additions & 3 deletions src/data-api/types/collections/command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

import { WithNamespace } from '@/src/data-api';
import { WithTimeout } from '@/src/common';

/**
Expand All @@ -25,9 +24,13 @@ import { WithTimeout } from '@/src/common';
*
* @public
*/
export interface RunCommandOptions extends WithNamespace, WithTimeout {
export interface RunCommandOptions extends WithTimeout {
/**
* The collection to run the command on. If not provided, the command is run on the database.
*/
collection?: string
collection?: string,
/**
* The namespace (aka keyspace) to use for the db operation.
*/
namespace?: string | null,
}
5 changes: 3 additions & 2 deletions src/data-api/types/collections/create-collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.

import { SomeDoc, WithNamespace } from '@/src/data-api';
import { SomeDoc } from '@/src/data-api';
import { CollectionOptions } from '@/src/data-api/types';
import { WithTimeout } from '@/src/common/types';
import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection';

/** @internal */
export interface CreateCollectionCommand {
Expand All @@ -38,7 +39,7 @@ export interface CreateCollectionCommand {
*
* @public
*/
export interface CreateCollectionOptions<Schema extends SomeDoc> extends WithTimeout, CollectionOptions<Schema>, WithNamespace {
export interface CreateCollectionOptions<Schema extends SomeDoc> extends WithTimeout, CollectionOptions<Schema>, CollectionSpawnOptions {
/**
* If `true` or unset, runs an additional existence check before creating the collection, failing if the collection
* with the same name already exists, raising a {@link CollectionAlreadyExistsError}.
Expand Down
Loading

0 comments on commit ebc875d

Please sign in to comment.