diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..6e3dc7d8 --- /dev/null +++ b/.env.example @@ -0,0 +1,38 @@ +## Fill in real values and rename this file to .env before +## running integration tests on your machine. + +## This should be your personal API key. These will get picked up +## and used any time you run integration tests under +## "poetry run pytest tests/integration" +## +## This key is also read and used to setup the pc client instance +## when running "poetry run repl". This makes it easy to do +## one-off manual testing. +PINECONE_API_KEY='' + +## If you set this variable, you can also use the pcci client instance +## when running "poetry run repl" in order to do cleanup/management +## on the project used from CI. +PINECONE_API_KEY_CI_TESTING='' + +## These headers get picked up and attached to every request by the code in +## pinecone/config/pinecone_config.py +## +## The x-environment header is used to route requests to preprod. The value needs to be +## a JSON string so it can be properly stored and read from an env var. +PINECONE_ADDITIONAL_HEADERS='{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + +## There's a bunch of tests in tests/integration/data/test_weird_ids.py +## that we don't need to run most of the time. Only when refactoring the rat's nest +## of generated code to ensure we haven't broken something subtle with string handling. +SKIP_WEIRD=true + +## Some tests can run with either the Pinecone or PineconeGrpc client depending on +## whether this value is set. +USE_GRPC=false + +## When debugging, you may want to enable PINECONE_DEBUG_CURL this to see some requests translated into +## curl syntax. These are useful when reporting API issues to the backend team so they +## can be reproduced without having to setup a python repro. WARNING: This output will +## include the Api-Key header. +# PINECONE_DEBUG_CURL='true' diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e691e5fd..57ed758a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -15,6 +15,7 @@ on: - '*.jpeg' - '*.gif' - '*.svg' + - '*.example' push: branches: - main @@ -31,6 +32,7 @@ on: - '*.jpeg' - '*.gif' - '*.svg' + - '*.example' workflow_dispatch: {} concurrency: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c9d294bb..9db113f9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,9 +2,7 @@ ## Installing development versions -If you want to explore a potential code change, investigate -a bug, or just want to try unreleased features, you can also install -specific git shas. +If you want to explore a potential code change, investigate a bug, or just want to try unreleased features, you can also install specific git shas. Some example commands: @@ -16,20 +14,9 @@ pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git@4 poetry add git+https://github.com/pinecone-io/pinecone-python-client.git@44fc7ed ``` - ## Developing locally with Poetry -[Poetry](https://python-poetry.org/) is a tool that combines [virtualenv](https://virtualenv.pypa.io/en/latest/) usage with dependency management, to provide a consistent experience for project maintainers and contributors who need to develop the pinecone-python-client -as a library. - -A common need when making changes to the Pinecone client is to test your changes against existing Python code or Jupyter Notebooks that `pip install` the Pinecone Python client as a library. - -Developers want to be able to see their changes to the library immediately reflected in their main application code, as well as to track all changes they make in git, so that they can be contributed back in the form of a pull request. - -The Pinecone Python client therefore supports Poetry as its primary means of enabling a consistent local development experience. This guide will walk you through the setup process so that you can: -1. Make local changes to the Pinecone Python client that are separated from your system's Python installation -2. Make local changes to the Pinecone Python client that are immediately reflected in other local code that imports the pinecone client -3. Track all your local changes to the Pinecone Python client so that you can contribute your fixes and feature additions back via GitHub pull requests +[Poetry](https://python-poetry.org/) is a tool that combines [virtualenv](https://virtualenv.pypa.io/en/latest/) usage with dependency management, to provide a consistent experience for project maintainers and contributors who need to develop the pinecone-python-client as a library. ### Step 1. Fork the Pinecone python client repository @@ -41,38 +28,13 @@ It will take a few seconds for your fork to be ready. When it's ready, **clone y Change directory into the repository, as we'll be setting up a virtualenv from within the root of the repository. -### Step 1. Install Poetry +### Step 2. Install Poetry Visit [the Poetry site](https://python-poetry.org/) for installation instructions. -To use the [Poetry `shell` command](https://python-poetry.org/docs/cli#shell), install the [`shell` plugin](https://github.com/python-poetry/poetry-plugin-shell). - -### Step 2. Install dependencies - -Run `poetry install` from the root of the project. - -### Step 3. Activate the Poetry virtual environment and verify success -Run `poetry shell` from the root of the project. At this point, you now have a virtualenv set up in this directory, which you can verify by running: +### Step 3. Install dependencies -`poetry env info` - -You should see something similar to the following output: - -```bash -Virtualenv -Python: 3.9.16 -Implementation: CPython -Path: /home/youruser/.cache/pypoetry/virtualenvs/pinecone-fWu70vbC-py3.9 -Executable: /home/youruser/.cache/pypoetry/virtualenvs/pinecone-fWu70vbC-py3.9/bin/python -Valid: True - -System -Platform: linux -OS: posix -Python: 3.9.16 -Path: /home/linuxbrew/.linuxbrew/opt/python@3.9 -``` -If you want to extract only the path to your new virtualenv, you can run `poetry env info --path` +Run `poetry install -E grpc -E asyncio` from the root of the project. ### Step 4. Enable pre-commit hooks. @@ -80,110 +42,38 @@ Run `poetry run pre-commit install` to enable checks to run when you commit so y ## Common tasks -### Running tests - -- Unit tests: `make test-unit` -- Integration tests: `PINECONE_API_KEY="YOUR API KEY" make test-integration` -- Run the tests in a single file: `poetry run pytest tests/unit/data/test_bulk_import.py -s -vv` - -### Running the ruff linter / formatter - -These should automatically trigger if you have enabled pre-commit hooks with `poetry run pre-commit install`. But in case you want to trigger these yourself, you can run them like this: - -``` -poetry run ruff check --fix # lint rules -poetry run ruff format # formatting -``` +### Debugging -If you want to adjust the behavior of ruff, configurations are in `pyproject.toml`. +See the [debugging guide](./docs/maintainers/debugging.md). If you find an issue and would like to report it as a github issue, make sure you do not leak your API key that may be included in debug outputs. +### Running tests -### Consuming API version upgrades +- Unit tests: `make test-unit` +- Run the tests in a single file: `poetry run pytest tests/unit/data/test_bulk_import.py` -These instructions can only be followed by Pinecone employees with access to our private APIs repository. +For more information on testing, see the [Testing guide](./docs/maintainers/testing-guide.md). External contributors should not worry about running integration tests as they make live calls to Pinecone and will incur significant costs. -Prerequisites: -- You must be an employee with access to private Pinecone repositories -- You must have [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed and running. Our code generation script uses a dockerized version of the OpenAPI CLI. -- You must have initialized the git submodules under codegen +### Running the type checker -```sh -git submodule -``` +If you are adding new code, you should make an effort to annotate it with [type hints](https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html). -To regenerate the generated portions of the client with the latest version of the API specifications, you need to have Docker Desktop running on your local machine. +You can run the type-checker to check for issues with: ```sh -./codegen/ -``` - - -## Loading your virtualenv in another shell - -It's a common need when developing against this client to load it as part of some other application or Jupyter Notebook code, modify -it directly, see your changes reflected immediately and also have your changes tracked in git so you can contribute them back. - -It's important to understand that, by default, if you open a new shell or terminal window, or, for example, a new pane in a tmux session, -your new shell will not yet reference the new virtualenv you created in the previous step. - -### Step 1. Get the path to your virtualenv - -We're going to first get the path to the virtualenv we just created, by running: - -```bash -poetry env info --path -``` - -You'll get a path similar to this one: `/home/youruser/.cache/pypoetry/virtualenvs/pinecone-fWu70vbC-py3.9/` - -### Step 2. Load your existing virtualenv in your new shell - -Within this path is a shell script that lives at `/bin/activate`. Importantly, you cannot simply run this script, but you -must instead source it like so: - -```bash -source /home/youruser/.cache/pypoetry/virtualenvs/pinecone-fWu70vbC-py3.9/bin/activate -``` -In the above example, ensure you're using your own virtualenv path as returned by `poetry env info --path`. - -### Step 3. Test out your virtualenv - -Now, we can test that our virtualenv is working properly by adding a new test module and function to the `pinecone` client within our virtualenv -and running it from the second shell. - -#### Create a new test file in pinecone-python-client -In the root of your working directory of the `pinecone-python-client` where you first ran `poetry shell`, add a new file named `hello_virtualenv.py` under the `pinecone` folder. - -In that file write the following: - -```python -def hello(): - print("Hello, from your virtualenv!") +poetry run mypy pinecone ``` -Save the file. -#### Create a new test file in your second shell -This step demonstrates how you can immediately test your latest Pinecone client code from any local Python application or Jupyter Notebook: - -In your second shell, where you ran `source` to load your virtualenv, create a python file named `test.py` and write the following: +### Running the ruff linter / formatter -```python -from pinecone import hello_virtualenv +These should automatically trigger if you have enabled pre-commit hooks with `poetry run pre-commit install`. But in case you want to trigger these yourself, you can run them like this: -hello_virtualenv.hello() ``` - -Save the file. Run it with your Python binary. Depending on your system, this may either be `python` or `python3`: - -```bash -python3 test.py +poetry run ruff check --fix # lint rules +poetry run ruff format # formatting ``` -You should see the following output: +If you experience any issues please [file a new issue](https://github.com/pinecone-io/pinecone-python-client/issues/new). -```bash -❯ python3 test.py -Hello, from your virtualenv! -``` +### Submitting a Pull Request -If you experience any issues please [file a new issue](https://github.com/pinecone-io/pinecone-python-client/issues/new). +Once you have a change in your fork you feel good about, confirm you are able to run unit tests, pass the ruff and mypy type-checking steps, please submit a [Pull Request](https://github.com/pinecone-io/pinecone-python-client/compare). All code contributed to the pinecone-python-client repository is licensed under the [Apache 2.0 license](./LICENSE.txt). diff --git a/MAINTAINERS.md b/MAINTAINERS.md new file mode 100644 index 00000000..eb9630bb --- /dev/null +++ b/MAINTAINERS.md @@ -0,0 +1,96 @@ +# Maintainers + +This guide is aimed primarily at Pinecone employees working on maintaining and developing the python SDK. + +## Setup + +### 1. Clone the repo + +```sh +git clone git@github.com:pinecone-io/pinecone-python-client.git +``` + +### 2. Install Poetry + +Visit [the Poetry site](https://python-poetry.org/docs/#installation) for installation instructions. + +### 3. Install dependencies + +Run this from the root of the project. + +```sh +poetry install -E grpc -E asyncio +``` + +These extra groups for `grpc` and `asyncio` are optional but required to do development on those optional parts of the SDK. + +### 4. Enable pre-commit hooks + +Run `poetry run pre-commit install` to enable checks to run when you commit so you don't have to find out during your CI run that minor lint issues need to be addressed. + +### 5. Setup environment variables + +Some tests require environment variables to be set in order to run. + +```sh +cp .env.example .env +``` + +After copying the template, you will need to fill in your secrets. `.env` is in `.gitignore`, so there's no concern about accidentally committing your secrets. + +### Testing + +There is a lot to say about testing the Python SDK. See the [testing guide](./docs/maintainers/testing-guide.md). + +### Debugging + +See the [debugging guide](./docs/maintainers/debugging.md) + + +### Preparing a new release candidate (for major release / API version bump) + +These instructions can only be followed by Pinecone employees with access to our private APIs repository. + +Prerequisites: +- You must be an employee with access to private Pinecone repositories +- You must have [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed and running. Our code generation script uses a dockerized version of the OpenAPI CLI. +- You must have initialized the git submodules under `codegen/` + +First create a prerelease branch where all the work for the upcoming release will be integrated. For example, for 2025-04 API release I worked off of this branch: + +``` +git checkout main +git pull +git checkout release-candidate/2025-04 +git push origin release-candidate/2025-04 +``` + +The release-candidate branch is where we will integrate all changes for an upcoming release which may include work from many different PRs and commits. So we can push it directly with no changes. + +Next, to regenerate the generated parts of the SDK using a new API spec, I make a second branch to hold my changes + +```sh +git checkout jhamon/regen-2025-04 +``` + +Then you run the build script by passing the API version as an argument, like this: + +```sh +./codegen/build-oas.sh 2025-07 +``` + +For grpc updates, it's a similar story: + +```sh +./codegen/build-grpc.sh 2025-07 +``` + +Commit the generated files which should be mainly placed under `pinecone/core`. Commit the sha changes in the git submodule at `codegen/apis`. + +Run the type check with `poetry run mypy pinecone`. This will usually surface breaking changes as a result of things being renamed or modified. + +Push your branch (`git push origin jhamon/regen-2025-04` in this example) and open a PR **against the RC branch** (in this example `release-candidate/2025-04`). This will allow the full PR test suite to kick off and help you discover what other changes you need to make. + +### Releasing + +See the [release instructions](./docs/maintainers/releasing.md) diff --git a/docs/maintainers/debugging.md b/docs/maintainers/debugging.md new file mode 100644 index 00000000..f5cc7501 --- /dev/null +++ b/docs/maintainers/debugging.md @@ -0,0 +1,146 @@ +# Debugging the Pinecone SDK + +## Enabling debug logging for REST, asyncio + +You can turn on detailed debug logging if needed, but it's a little bit challenging because it's not currently exposed to the user in a nice way. You have to reach into the internals a bit after the client is instantiated to see everything. + +> [!WARNING] +> Be careful with this output as it will leak headers with secrets, including the `Api-Key` header. I manually redacted that value from this example below. + +If I defined a script like this in a file `scripts/repro.py`: + +```python +import dotenv +import logging +from pinecone import Pinecone + +dotenv.load_dotenv() + +logging.basicConfig( + level=logging.DEBUG, + format="%(levelname)-8s | %(name)s:%(lineno)d | %(message)s" +) + +pc = Pinecone() +pc._openapi_config.debug = True +pc.describe_index('jen') +``` + +Running it with `poetry run python3 scripts/repro.py` would give output like + +``` +DEBUG | pinecone.openapi_support.rest_urllib3:125 | Calling urllib3 request() +send: b'GET /indexes/jen HTTP/1.1\r\nHost: api.pinecone.io\r\nAccept-Encoding: identity\r\nAccept: application/json\r\nUser-Agent: python-client-6.0.2\r\nx-environment: preprod-aws-0\r\nX-Pinecone-API-Version: 2025-04\r\nApi-Key: REDACTEDX\r\n\r\n' +reply: 'HTTP/1.1 200 OK\r\n' +header: content-type: application/json +header: access-control-allow-origin: * +header: vary: origin,access-control-request-method,access-control-request-headers +header: access-control-expose-headers: * +header: x-pinecone-api-version: 2025-04 +header: x-cloud-trace-context: ac668597d0413fd780f6d9536f80195b +header: date: Wed, 21 May 2025 16:48:08 GMT +header: server: Google Frontend +header: Content-Length: 263 +header: Via: 1.1 google +header: Alt-Svc: h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 +DEBUG | urllib3.connectionpool:546 | https://api.pinecone.io:443 "GET /indexes/jen HTTP/11" 200 0 +DEBUG | pinecone.openapi_support.rest_urllib3:265 | response body: b'{"name":"jen","vector_type":"dense","metric":"cosine","dimension":2,"status":{"ready":true,"state":"Ready"},"host":"jen-dojoi3u.svc.preprod-aws-0.pinecone.io","spec":{"serverless":{"region":"us-east-1","cloud":"aws"}},"deletion_protection":"disabled","tags":null}' +DEBUG | pinecone.openapi_support.rest_utils:34 | response status: 200 +{ + "name": "jen", + "metric": "cosine", + "host": "jen-dojoi3u.svc.preprod-aws-0.pinecone.io", + "spec": { + "serverless": { + "cloud": "aws", + "region": "us-east-1" + } + }, + "status": { + "ready": true, + "state": "Ready" + }, + "vector_type": "dense", + "dimension": 2, + "deletion_protection": "disabled", + "tags": null +} +``` + +## Enabling debug logging for GRPC + +Debug output for GRPC is controlled with [environment variables](https://github.com/grpc/grpc/blob/master/doc/environment_variables.md). Set `GRPC_TRACE='all'`. + +## Using breakpoints + +Python has a built-in debugger called [pdb](https://docs.python.org/3/library/pdb.html). + +Basic usage involves inserting a call to `breakpoint()` into your program. This will halt when reached during execution and drop you into a REPL that allows you to explore the local variables at that point of the execution. + +Once you're in the pdb session, you can inspect variables, advance line by line using `next`, or resume execution using `continue`. This can be a really useful technique for getting to the bottom of a problem when working on a complex integration test or doing manual testing in the repl. + +A useful spot to insert the `breakpoint()` invocation is inside the `request` method of the `Urllib3RestClient` or `AiohttpRestClient` classes. After making an edit to insert a `breakpoint()` invocation in my request method, I can inspect the request params like this: + +```sh +poetry run repl + + Welcome to the custom Python REPL! + Your initialization steps have been completed. + + Two Pinecone objects are available: + - pc: Built using the PINECONE_API_KEY env var, if set + - pcci: Built using the PINECONE_API_KEY_CI_TESTING env var, if set + + You can use the following functions to clean up the environment: + - delete_all_indexes(pc) + - delete_all_pod_indexes(pc) + - delete_all_collections(pc) + - delete_all_backups(pc) + - cleanup_all(pc) + +>>> pc.describe_index('jen') +> /Users/jhamon/workspace/pinecone-python-client/pinecone/openapi_support/rest_urllib3.py(127)request() +-> method = method.upper() +(Pdb) method +'GET' +(Pdb) url +'https://api.pinecone.io/indexes/jen' +(Pdb) next +> /Users/jhamon/workspace/pinecone-python-client/pinecone/openapi_support/rest_urllib3.py(128)request() +-> assert method in ["GET", "HEAD", "DELETE", "POST", "PUT", "PATCH", "OPTIONS"] +(Pdb) next +> /Users/jhamon/workspace/pinecone-python-client/pinecone/openapi_support/rest_urllib3.py(130)request() +-> if os.environ.get("PINECONE_DEBUG_CURL"): +(Pdb) next +> /Users/jhamon/workspace/pinecone-python-client/pinecone/openapi_support/rest_urllib3.py(158)request() +-> if post_params and body: +(Pdb) continue +{ + "name": "jen", + "metric": "cosine", + "host": "jen-dojoi3u.svc.preprod-aws-0.pinecone.io", + "spec": { + "serverless": { + "cloud": "aws", + "region": "us-east-1" + } + }, + "status": { + "ready": true, + "state": "Ready" + }, + "vector_type": "dense", + "dimension": 2, + "deletion_protection": "disabled", + "tags": null +} +``` + +## Reporting errors to backend teams + +Sometimes errors are caused by unexpected behavior in the underlying API. Once you have confirmed this is the case, you need to convey that information to the appropriate backend teams in a concise way that removes all doubt that the SDK is to blame. + +You can set the environment variable `PINECONE_DEBUG_CURL='true'` to see some printed output approximating what the REST client does translated into curl calls. This is useful for reporting API problems in a way that is copy/pasteable to backend teams for easy reproducibility without all the hassle of setting up a python notebook to repro. Be aware that this output will leak your API key. + +> [!WARNING] +> Output from `PINECONE_DEBUG_CURL='true'` will include your secret API key. Do not use it in production environments and be careful when sharing the output. diff --git a/docs/maintainers/release-workflow.png b/docs/maintainers/release-workflow.png new file mode 100644 index 00000000..4279a174 Binary files /dev/null and b/docs/maintainers/release-workflow.png differ diff --git a/docs/maintainers/releasing.md b/docs/maintainers/releasing.md new file mode 100644 index 00000000..38245c62 --- /dev/null +++ b/docs/maintainers/releasing.md @@ -0,0 +1,44 @@ +# Releasing the Pinecone SDK + +The Pinecone SDK is published to PyPI as `pinecone`. + +The release process is mostly automated in a pair of git workflows that are triggered using the "Run workflow" button in the Github UI. + +![Release workflow](./release-workflow.png) + +## Dev Builds / Release Candidates + +For these, you will use the [PyPI Release: Pre-Release (pinecone)](https://github.com/pinecone-io/pinecone-python-client/actions/workflows/alpha-release.yaml) github workflow. + +This workflow: +- Increments the version number based on your selelction in the dropdown menu +- Build and uploads the release to PyPI + +Notably, the version number change is not committed or tagged and tests are not run. + +## Releasing + +For "real" releases (i.e. those without a `.rc` or `.dev` suffix applied to the release number), we use the [PyPI Release: Production (pinecone)](https://github.com/pinecone-io/pinecone-python-client/actions/workflows/release.yaml) workflow. + +Prior to running the release workflow, you should ensure: +- If this is a major release, you should have merged work in your release-candidate branch into main +- The last test run on main was green +- There are no unmerged PRs you expected to include in the release +- You have prepared some draft release notes so you can publish them right away once the artifact is available on PyPI +- Other stakeholders (docs, marketing, etc) are ready for a new release to go out. + +This workflow: +- Executes all tests +- Increments the version number based on your selection in the dropdown menu +- Commits the version number change +- Tags the commit with a git tag for the version number. +- Builds and uploads the release to PyPI +- Git pushes the commit and tags to `main` so they are persisted. + +Once the release is out, you need to [publish release notes](https://github.com/pinecone-io/pinecone-python-client/releases/tag/v7.0.0) + +Publishing these release notes will result in a notification being published to the internal Slack. This is what lets other stakeholders (docs, devrel, field engineering, etc) know a new release is available. + +## Yanking a bad release + +If you ship something that is later discovered to have some sort of catastrophic issue, it is possible to yank a bad release from PyPI. For this, find our PyPI credentials in the engineering vault on 1Password. diff --git a/docs/maintainers/testing-guide.md b/docs/maintainers/testing-guide.md new file mode 100644 index 00000000..c151d958 --- /dev/null +++ b/docs/maintainers/testing-guide.md @@ -0,0 +1,200 @@ +# Testing the Pinecone SDK + +We have a lot of different types of tests in this repository. At a high level, they are structured like this: + +``` +tests +├── dependency +├── integration +├── perf +├── unit +├── unit_grpc +└── upgrade +``` + +- `dependency`: These tests are a set of very minimal end-to-end integration tests that ensure basic functionality works to upsert and query vectors from an index. These are rarely run locally; we use them in CI to confirm the client can be used when installed with a large matrix of different python versions and versions of key dependencies. See [`.github/workflows/testing-dependency.yaml`](https://github.com/pinecone-io/pinecone-python-client/blob/main/.github/workflows/testing-dependency.yaml) for more details on how these are run. + +- `integration`: These are a large suite of end-to-end integration tests exercising most of the core functions of the product. They are slow and expensive to run, but they give the greatest confidence the SDK actually works end-to-end. See notes below on how to setup the required configuration and run individual tests if you are iterating on a bug or feature and want to get more rapid feedback than running the entire suite in CI will give you. In CI, these are run using [`.github/workflows/testing-dependency.yaml`](https://github.com/pinecone-io/pinecone-python-client/blob/main/.github/workflows/testing-integration.yaml). + +- `perf`: These tests are still being developed. But eventually, they will play an important roll in making sure we don't regress on client performance when building new features. + +- `unit` and `unit_grpc`. These are what you would probably expect. Unit-testing makes up a relatively small portion of our testing because there's not that much business logic that makes sense to test in isolation. But it is ocassionally useful when doing some sort of data conversions with many edge cases (e.g. `VectorFactory`) or merging results (e.g. `QueryResultsAggregator`) to write some unit tests. If you have a situation where unit testing is appropriate, they are really great to work with because they are fast and don't have any external dependencies. In CI, these are run with the [`.github/workflows/testing-unit.yaml`](https://github.com/pinecone-io/pinecone-python-client/blob/main/.github/workflows/testing-unit.yaml) workflow. + +- `upgrade`: These are also still being developed and if you are reading this guide you probably don't need to worry about them. The goal of these is to ensure we're not introducing breaking changes without realizing it. + + +## Running the ruff linter / formatter + +These should automatically trigger if you have enabled pre-commit hooks with `poetry run pre-commit install`. But in case you want to trigger these yourself, you can run them like this: + +```sh +poetry run ruff check --fix # lint rules +poetry run ruff format # formatting +``` + +If you want to adjust the behavior of ruff, configurations are in `pyproject.toml`. + +## Running the type checker + +If you are adding new code, you should make an effort to annotate it with [type hints](https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html). + +You can run the type-checker to check for issues with: + +```sh +poetry run mypy pinecone +``` + +## Automated tests + +### Running unit tests + +Unit-testing makes up a relatively small portion of our testing because there's not that much business logic that makes sense to test in isolation. But it is ocassionally useful when doing some sort of data conversions with many edge cases (e.g. `VectorFactory`) or merging results (e.g. `QueryResultsAggregator`) to write some unit tests. + +Unit tests do not automatically read environment variables in the `.env` file because some of the tests relate to parsing values from environment variables and we don't want values in our `.env` file to impact how these tests execute. + +To run them: + +- For REST: `poetry run pytest tests/unit` +- For GRPC: `poetry run pytest tests/unit_grpc` + +If you want to set an environment variable anyway, you can do it be prefacing the test command inline. E.g. `FOO='bar' poetry run pytest tests/unit` + +### Running integration tests + +Integration tests make real calls to Pinecone. They are slow but give the highest level of confidence the client is actually working end to end. **In general, only Pinecone employees should run these because the cost of the creating underlying resources can be quite significant, particularly if errors occur and some resources are not cleaned up properly.** + +For these tests, you need to make sure you've set values inside of an `.env` file (see `.env.example` for more information). These will be read using `dotenv` when tests are run. + +I never run all of these locally in one shot because it would take too long and is generally unnecessary; in CI, the tests are broken up across many different jobs so they can run in parallel and minimize the amount of retesting when a failure results in the entire build being re-run. + +If I see one or a few tests broken in CI, I will run just those tests locally while iterating on the fix: + +- Run the tests for a specific part of the SDK (example: index): `poetry run pytest tests/integration/control/resources/index` +- Run the tests in a single file: `poetry run pytest tests/integration/control/resources/index/test_create.py` +- Run a single test `poetry run pytest tests/integration/control/resources/index/test_list.py::TestListIndexes::test_list_indexes_includes_ready_indexes` + +### Fixtures and other test configuration + +Many values are read from environment variables (from `.env`) or set in CI workflows such as `.github/workflows/testing-integration.yaml`. + +At the level of the testing framework, a lot of test fixtures as well as setup & cleanup tasks take place in special files called `conftest.py`. This file name has [special significance](https://docs.pytest.org/en/stable/reference/fixtures.html#conftest-py-sharing-fixtures-across-multiple-files) to pytest and your fixtures won't be loaded if you mispell the name of the file, so be careful if you are setting up a new group of tests that need a `conftest.py` file. + +Within a conftest file, a fixture can be defined like this with the `@pytest.fixture` decorator: + +```python +@pytest.fixture() +def foo(request): + return "FOO" +``` + +Then in the test file, you can refer to the fixture by name in the parameters to your test function: + +```python +class MyExampleTest: + def test_foo(self, foo): + assert foo == "FOO" +``` + +This is a highly contrived example, but we use this technique to access test configuration controlled with environment variables and resources that have heavy setup & cleanup cost (e.g. spinning up indexes) that we want to manage in one place rather than duplicating those steps in many places throughout a codebase. + +### Testing data plane: REST vs GRPC vs Asyncio + +Integration tests for the data plane (i.e. `poetry run pytest tests/integration/data`) are reused for both the REST and GRPC client variants since the interfaces of these different client implementations are nearly identical (other than `async_req=True` responses). To toggle how they are run, set `USE_GRPC='true'` in your `.env` before running. + +There are a relatively small number of tests which are not shared, usually related to futures when using GRPC with `async_req=True`. We use `@pytest.mark.skipif` to control whether these are run or not. + +```python +class TestDeleteFuture: + @pytest.mark.skipif( + os.getenv("USE_GRPC") != "true", reason="PineconeGrpcFutures only returned from grpc client" + ) + def test_delete_future(self, idx): + # ... test implementation +``` + +Asyncio tests of the data plane are unfortunately separate because there are quite a few differences in how you interact with the asyncio client. So those tests are found in a different directory, `tests/integration/data_asyncio` + +## Manual testing + +### With an interactive REPL + +You can access a python REPL that is preloaded with the virtualenv maintained by Poetry (including all dependencies declared in `pyproject.toml`), any changes you've made to the code in `pinecone/`, the environment variables set in your `.env` file, and a few useful variables and functions defined in [`scripts/repl.py`](https://github.com/pinecone-io/pinecone-python-client/blob/main/scripts/repl.py) : + +```sh +$ poetry run repl + + Welcome to the custom Python REPL! + Your initialization steps have been completed. + + Two Pinecone objects are available: + - pc: Built using the PINECONE_API_KEY env var, if set + - pcci: Built using the PINECONE_API_KEY_CI_TESTING env var, if set + + You can use the following functions to clean up the environment: + - delete_all_indexes(pc) + - delete_all_pod_indexes(pc) + - delete_all_collections(pc) + - delete_all_backups(pc) + - cleanup_all(pc) + +>>> pc.describe_index(name='jen') +{ + "name": "jen", + "metric": "cosine", + "host": "jen-dojoi3u.svc.preprod-aws-0.pinecone.io", + "spec": { + "serverless": { + "cloud": "aws", + "region": "us-east-1" + } + }, + "status": { + "ready": true, + "state": "Ready" + }, + "vector_type": "dense", + "dimension": 2, + "deletion_protection": "disabled", + "tags": null +} +``` + +### Investigating module import performance + +We don't have automated tests for this, but if you want to do some one-off testing to check on how efficiently the package can be imported and initialized, you can run code like this: + +```sh +poetry run python3 -X importtime -c 'from pinecone import Pinecone; pc = Pinecone(api_key="foo")' 2> import_time.log +``` + +And then inspect the results with a visualization tool called tuna. + +```sh +poetry run tuna import_time.log +``` + +This is a useful thing to do when you are introducing new classes or plugins to ensure you're not causing a performance regression on imports. + +### Installing SDK WIP in another project on your machine + +pip, poetry, and similar tools know how to install from local files. This can sometimes be useful to validate a change or bugfix. + +If your local files look like this: + +``` +workspace +├── pinecone-python-client/ +└── repro_project/ +``` + +You should be able to test changes in your repro project by doing something like + +```sh +cd repro_project + +# With poetry +poetry add ../pinecone-python-client + +# With pip3 +pip3 install ../pinecone-python-client +``` diff --git a/scripts/repl.py b/scripts/repl.py index 82d5ce26..79edae53 100644 --- a/scripts/repl.py +++ b/scripts/repl.py @@ -15,6 +15,7 @@ def main(): # - Loading configuration files dotenv.load_dotenv() + logging.basicConfig( level=logging.DEBUG, format="%(levelname)-8s | %(name)s:%(lineno)d | %(message)s" ) @@ -35,11 +36,12 @@ def main(): Your initialization steps have been completed. Two Pinecone objects are available: - - pc: Interact with the one-offs project - - pcci: Interact with the pinecone-python-client project (CI testing) + - pc: Built using the PINECONE_API_KEY env var, if set + - pcci: Built using the PINECONE_API_KEY_CI_TESTING env var, if set You can use the following functions to clean up the environment: - delete_all_indexes(pc) + - delete_all_pod_indexes(pc) - delete_all_collections(pc) - delete_all_backups(pc) - cleanup_all(pc) @@ -49,6 +51,20 @@ def main(): # slow down the rate of requests sleep_interval = 30 + def delete_all_pod_indexes(pc): + for index in pc.db.index.list(): + if index.spec.pod is not None: + logger.info(f"Deleting index {index.name}") + try: + if index.deletion_protection == "enabled": + logger.info(f"Disabling deletion protection for index {index.name}") + pc.db.index.configure(name=index.name, deletion_protection="disabled") + time.sleep(sleep_interval) + pc.db.index.delete(name=index.name) + time.sleep(sleep_interval) + except Exception as e: + logger.error(f"Error deleting index {index.name}: {e}") + def delete_all_indexes(pc): for index in pc.db.index.list(): logger.info(f"Deleting index {index.name}") @@ -56,6 +72,7 @@ def delete_all_indexes(pc): if index.deletion_protection == "enabled": logger.info(f"Disabling deletion protection for index {index.name}") pc.db.index.configure(name=index.name, deletion_protection="disabled") + time.sleep(sleep_interval) pc.db.index.delete(name=index.name) time.sleep(sleep_interval) except Exception as e: @@ -86,22 +103,38 @@ def cleanup_all(pc): # We want to route through preprod by default if os.environ.get("PINECONE_ADDITIONAL_HEADERS") is None: + logger.warning( + 'You have not set a value for PINECONE_ADDITIONAL_HEADERS in your .env file so the default value of {"x-environment": "preprod-aws-0"} will be used.' + ) os.environ["PINECONE_ADDITIONAL_HEADERS"] = '{"x-environment": "preprod-aws-0"}' # Create a custom namespace with any pre-loaded variables namespace = { "__name__": "__main__", "__doc__": None, - "pc": Pinecone(), - "pcci": Pinecone(api_key=os.environ.get("PINECONE_API_KEY_CI_TESTING")), "delete_all_indexes": delete_all_indexes, "delete_all_collections": delete_all_collections, "delete_all_backups": delete_all_backups, + "delete_all_pod_indexes": delete_all_pod_indexes, "cleanup_all": cleanup_all, - "pcl": Pinecone(host="http://localhost:8000"), + "pcl": Pinecone(api_key="foo", host="http://localhost:8000"), # Add any other variables you want to have available in the REPL } + if os.environ.get("PINECONE_API_KEY") is not None: + namespace["pc"] = Pinecone() + else: + logger.warning( + "You have not set a value for PINECONE_API_KEY in your .env file so the pc object was not pre-created for you. See .env.example for more information." + ) + + if os.environ.get("PINECONE_API_KEY_CI_TESTING") is not None: + namespace["pcci"] = Pinecone(api_key=os.environ.get("PINECONE_API_KEY_CI_TESTING")) + else: + logger.warning( + "You have not set a value for PINECONE_API_KEY_CI_TESTING in your .env file so the pcci object was not pre-created for you. See .env.example for more information." + ) + try: # Start the interactive console code.interact(banner=banner, local=namespace)