Skip to content

Commit 549360d

Browse files
refactor: add explicit ParameterSpec for embedding components
- Create crates/runtime/src/embeddings/params/ with per-provider ParameterSpec definitions for all 8 embedding providers (openai, azure, google, huggingface, databricks, bedrock, file, model2vec) - Update try_to_embedding to use Parameters::try_new with explicit specs - Update all provider function signatures from HashMap to Parameters - Remove unused extract_secret macro from embed.rs Closes spiceai#6755
1 parent 149a968 commit 549360d

11 files changed

Lines changed: 374 additions & 61 deletions

File tree

crates/runtime/src/embeddings/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616
pub mod common;
17+
pub mod params;
1718
pub mod connector;
1819
pub mod execution_plan;
1920

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
use crate::parameters::ParameterSpec;
18+
19+
const AZURE_PARAM_LEN: usize = 5;
20+
21+
pub const PARAMETERS: &[ParameterSpec] = &AZURE_PARAMETERS;
22+
23+
pub(crate) const AZURE_PARAMETERS: [ParameterSpec; AZURE_PARAM_LEN] = [
24+
ParameterSpec::runtime("endpoint")
25+
.description("The Azure OpenAI resource endpoint, e.g., https://resource-name.openai.azure.com."),
26+
ParameterSpec::component("api_version")
27+
.description("The API version used for the Azure OpenAI service."),
28+
ParameterSpec::component("deployment_name")
29+
.description("The name of the model deployment."),
30+
ParameterSpec::component("api_key")
31+
.secret()
32+
.description("The Azure OpenAI API key."),
33+
ParameterSpec::component("entra_token")
34+
.secret()
35+
.description("The Azure Entra token for authentication."),
36+
];
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
use crate::parameters::ParameterSpec;
18+
19+
const BEDROCK_PARAM_LEN: usize = 5;
20+
21+
pub const PARAMETERS: &[ParameterSpec] = &BEDROCK_PARAMETERS;
22+
23+
pub(crate) const BEDROCK_PARAMETERS: [ParameterSpec; BEDROCK_PARAM_LEN] = [
24+
ParameterSpec::component("dimensions")
25+
.description("The number of dimensions for the embedding output."),
26+
ParameterSpec::component("normalize")
27+
.description("Whether to normalize the embedding output.")
28+
.one_of(&["true", "false"]),
29+
ParameterSpec::component("truncate_mode")
30+
.description("Truncation mode for input text that exceeds the model's token limit."),
31+
ParameterSpec::component("input_type")
32+
.description("The input type for Cohere embedding models."),
33+
ParameterSpec::component("embedding_purpose")
34+
.description("The embedding purpose for Nova multimodal embedding models."),
35+
];
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
use crate::parameters::ParameterSpec;
18+
19+
const DATABRICKS_PARAM_LEN: usize = 4;
20+
21+
pub const PARAMETERS: &[ParameterSpec] = &DATABRICKS_PARAMETERS;
22+
23+
pub(crate) const DATABRICKS_PARAMETERS: [ParameterSpec; DATABRICKS_PARAM_LEN] = [
24+
ParameterSpec::component("endpoint")
25+
.description("The Databricks workspace endpoint, e.g., dbc-a12cd3e4-56f7.cloud.databricks.com."),
26+
ParameterSpec::component("token")
27+
.secret()
28+
.description("The Databricks API token."),
29+
ParameterSpec::component("client_id")
30+
.description("The Databricks Service Principal Client ID. Cannot be used with databricks_token."),
31+
ParameterSpec::component("client_secret")
32+
.secret()
33+
.description("The Databricks Service Principal Client Secret. Cannot be used with databricks_token."),
34+
];
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
use crate::parameters::ParameterSpec;
18+
19+
const FILE_PARAM_LEN: usize = 2;
20+
21+
pub const PARAMETERS: &[ParameterSpec] = &FILE_PARAMETERS;
22+
23+
pub(crate) const FILE_PARAMETERS: [ParameterSpec; FILE_PARAM_LEN] = [
24+
ParameterSpec::runtime("pooling")
25+
.description("The pooling strategy for the embedding model.")
26+
.one_of(&["cls", "mean", "splade"]),
27+
ParameterSpec::runtime("max_seq_length")
28+
.description("The maximum sequence length for the embedding model."),
29+
];
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
use crate::parameters::ParameterSpec;
18+
19+
const GOOGLE_PARAM_LEN: usize = 2;
20+
21+
pub const PARAMETERS: &[ParameterSpec] = &GOOGLE_PARAMETERS;
22+
23+
pub(crate) const GOOGLE_PARAMETERS: [ParameterSpec; GOOGLE_PARAM_LEN] = [
24+
ParameterSpec::component("api_key")
25+
.secret()
26+
.description("The Google API key."),
27+
ParameterSpec::component("dimensions")
28+
.description("The number of dimensions for the embedding output."),
29+
];
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
use crate::parameters::ParameterSpec;
18+
19+
const HF_PARAM_LEN: usize = 3;
20+
21+
pub const PARAMETERS: &[ParameterSpec] = &HF_PARAMETERS;
22+
23+
pub(crate) const HF_PARAMETERS: [ParameterSpec; HF_PARAM_LEN] = [
24+
ParameterSpec::component("hf_token")
25+
.secret()
26+
.description("The Hugging Face access token."),
27+
ParameterSpec::runtime("pooling")
28+
.description("The pooling strategy for the embedding model.")
29+
.one_of(&["cls", "mean", "splade"]),
30+
ParameterSpec::runtime("max_seq_length")
31+
.description("The maximum sequence length for the embedding model."),
32+
];
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
pub mod azure;
18+
pub mod bedrock;
19+
pub mod databricks;
20+
pub mod file;
21+
pub mod google;
22+
pub mod huggingface;
23+
pub mod model2vec;
24+
pub mod openai;
25+
26+
use spicepod::component::embeddings::EmbeddingPrefix;
27+
pub use crate::parameters::ParameterSpec;
28+
29+
/// Returns the parameter specifications for a given embedding source.
30+
#[must_use]
31+
pub fn get_params_spec(source: &EmbeddingPrefix) -> &'static [ParameterSpec] {
32+
match source {
33+
EmbeddingPrefix::OpenAi => openai::PARAMETERS,
34+
EmbeddingPrefix::Azure => azure::PARAMETERS,
35+
EmbeddingPrefix::Google => google::PARAMETERS,
36+
EmbeddingPrefix::HuggingFace => huggingface::PARAMETERS,
37+
EmbeddingPrefix::Databricks => databricks::PARAMETERS,
38+
EmbeddingPrefix::Bedrock => bedrock::PARAMETERS,
39+
EmbeddingPrefix::File => file::PARAMETERS,
40+
EmbeddingPrefix::Model2Vec => model2vec::PARAMETERS,
41+
}
42+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
use crate::parameters::ParameterSpec;
18+
19+
const MODEL2VEC_PARAM_LEN: usize = 6;
20+
21+
pub const PARAMETERS: &[ParameterSpec] = &MODEL2VEC_PARAMETERS;
22+
23+
pub(crate) const MODEL2VEC_PARAMETERS: [ParameterSpec; MODEL2VEC_PARAM_LEN] = [
24+
ParameterSpec::component("hf_token")
25+
.secret()
26+
.description("The Hugging Face access token."),
27+
ParameterSpec::component("subfolder")
28+
.description("The subfolder within the Hugging Face repo containing the model."),
29+
ParameterSpec::component("normalize")
30+
.description("Whether to normalize the embedding output.")
31+
.one_of(&["true", "false"]),
32+
ParameterSpec::runtime("parallelism")
33+
.description("The number of threads to use for parallel inference."),
34+
ParameterSpec::runtime("embed_max_token_length")
35+
.description("The maximum token length for embedding input."),
36+
ParameterSpec::runtime("embed_custom_batch_size")
37+
.description("The custom batch size for embedding inference."),
38+
];
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
Copyright 2024-2025 The Spice.ai OSS Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
use crate::parameters::ParameterSpec;
18+
19+
const OPENAI_PARAM_LEN: usize = 5;
20+
21+
pub const PARAMETERS: &[ParameterSpec] = &OPENAI_PARAMETERS;
22+
23+
pub(crate) const OPENAI_PARAMETERS: [ParameterSpec; OPENAI_PARAM_LEN] = [
24+
ParameterSpec::runtime("endpoint")
25+
.description("The OpenAI API base endpoint.")
26+
.default("https://api.openai.com/v1"),
27+
ParameterSpec::component("api_key")
28+
.secret()
29+
.description("The OpenAI API key."),
30+
ParameterSpec::component("org_id")
31+
.description("The OpenAI organization ID."),
32+
ParameterSpec::component("project_id")
33+
.description("The OpenAI project ID."),
34+
ParameterSpec::component("usage_tier")
35+
.description("The current usage tier for the OpenAI account: 'free', 'tier1'-'tier5'.")
36+
.one_of(&["free", "tier1", "tier2", "tier3", "tier4", "tier5"])
37+
.default("tier1"),
38+
];

0 commit comments

Comments
 (0)