Skip to content

Commit 88bd7ad

Browse files
DamienDeepgrambd-g
andauthored
Add Nova 3 and Keyterm support (#110)
* Add Nova 3 and Keyterm support --------- Co-authored-by: Brent George <[email protected]>
1 parent 6f56308 commit 88bd7ad

File tree

2 files changed

+139
-1
lines changed

2 files changed

+139
-1
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "deepgram"
3-
version = "0.6.6"
3+
version = "0.6.7"
44
authors = ["Deepgram <[email protected]>"]
55
edition = "2021"
66
description = "Community Rust SDK for Deepgram's automated speech recognition APIs."

src/common/options.rs

+138
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub struct Options {
2626
search: Vec<String>,
2727
replace: Vec<Replace>,
2828
keywords: Vec<Keyword>,
29+
keyterms: Vec<String>,
2930
keyword_boost_legacy: Option<bool>,
3031
utterances: Option<Utterances>,
3132
tags: Vec<String>,
@@ -195,6 +196,17 @@ impl fmt::Display for Endpointing {
195196
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
196197
#[non_exhaustive]
197198
pub enum Model {
199+
/// Recommended for challenging audio.
200+
/// Recommended for most use cases.
201+
///
202+
/// Nova-3 expands on Nova-2's advancements with speech-specific
203+
/// optimizations to the underlying Transformer architecture, advanced
204+
/// data curation techniques, and a multi-stage training methodology.
205+
/// These changes yield reduced word error rate (WER) and enhancements
206+
/// to entity recognition (i.e. proper nouns, alphanumerics, etc.),
207+
/// punctuation, and capitalization and Keyterms.
208+
Nova3,
209+
198210
/// Recommended for readability and Deepgram's lowest word error rates.
199211
/// Recommended for most use cases.
200212
///
@@ -703,6 +715,7 @@ impl OptionsBuilder {
703715
search: Vec::new(),
704716
replace: Vec::new(),
705717
keywords: Vec::new(),
718+
keyterms: Vec::new(),
706719
keyword_boost_legacy: None,
707720
utterances: None,
708721
tags: Vec::new(),
@@ -1911,6 +1924,45 @@ impl OptionsBuilder {
19111924
self
19121925
}
19131926

1927+
/// Set the Keyterms feature.
1928+
///
1929+
/// Calling this when already set will append to the existing keyterms, not overwrite them.
1930+
///
1931+
/// See the [Deepgram Keyterms feature docs][docs] for more info.
1932+
///
1933+
/// [docs]: https://developers.deepgram.com/docs/keyterm
1934+
///
1935+
/// # Examples
1936+
///
1937+
/// ```
1938+
/// # use deepgram::common::options::Options;
1939+
/// #
1940+
/// let options = Options::builder()
1941+
/// .keyterms(["hello", "world"])
1942+
/// .build();
1943+
/// ```
1944+
///
1945+
/// ```
1946+
/// # use deepgram::common::options::Options;
1947+
/// #
1948+
/// let options1 = Options::builder()
1949+
/// .keyterms(["hello"])
1950+
/// .keyterms(["world"])
1951+
/// .build();
1952+
///
1953+
/// let options2 = Options::builder()
1954+
/// .keyterms(["hello", "world"])
1955+
/// .build();
1956+
///
1957+
/// assert_eq!(options1, options2);
1958+
/// ```
1959+
pub fn keyterms<'a>(mut self, keyterms: impl IntoIterator<Item = &'a str>) -> Self {
1960+
self.0
1961+
.keyterms
1962+
.extend(keyterms.into_iter().map(String::from));
1963+
self
1964+
}
1965+
19141966
/// Finish building the [`Options`] object.
19151967
pub fn build(self) -> Options {
19161968
self.0
@@ -1958,6 +2010,7 @@ impl Serialize for SerializableOptions<'_> {
19582010
search,
19592011
replace,
19602012
keywords,
2013+
keyterms,
19612014
keyword_boost_legacy,
19622015
utterances,
19632016
tags,
@@ -2182,13 +2235,18 @@ impl Serialize for SerializableOptions<'_> {
21822235
seq.serialize_element(&("callback_method", callback_method.as_str()))?;
21832236
}
21842237

2238+
for element in keyterms {
2239+
seq.serialize_element(&("keyterm", element))?;
2240+
}
2241+
21852242
seq.end()
21862243
}
21872244
}
21882245

21892246
impl AsRef<str> for Model {
21902247
fn as_ref(&self) -> &str {
21912248
match self {
2249+
Self::Nova3 => "nova-3",
21922250
Self::Nova2 => "nova-2",
21932251
Self::Nova => "nova",
21942252
Self::Enhanced => "enhanced",
@@ -2985,4 +3043,84 @@ mod serialize_options_tests {
29853043
"paragraphs=true",
29863044
);
29873045
}
3046+
3047+
#[test]
3048+
fn keyterms_serialization() {
3049+
check_serialization(&Options::builder().keyterms([]).build(), "");
3050+
3051+
check_serialization(
3052+
&Options::builder().keyterms(["hello"]).build(),
3053+
"keyterm=hello",
3054+
);
3055+
3056+
check_serialization(
3057+
&Options::builder().keyterms(["hello", "world"]).build(),
3058+
"keyterm=hello&keyterm=world",
3059+
);
3060+
3061+
// Test URL encoding of spaces
3062+
check_serialization(
3063+
&Options::builder().keyterms(["hello world"]).build(),
3064+
"keyterm=hello+world",
3065+
);
3066+
3067+
// Test with other features
3068+
check_serialization(
3069+
&Options::builder()
3070+
.model(Model::Nova3)
3071+
.language(Language::en)
3072+
.keyterms(["hello", "world"])
3073+
.punctuate(true)
3074+
.build(),
3075+
"model=nova-3&language=en&punctuate=true&keyterm=hello&keyterm=world",
3076+
);
3077+
3078+
// Test with multiple words per keyterm
3079+
check_serialization(
3080+
&Options::builder()
3081+
.keyterms(["hello world", "rust programming"])
3082+
.build(),
3083+
"keyterm=hello+world&keyterm=rust+programming",
3084+
);
3085+
}
3086+
3087+
#[test]
3088+
fn keyterms() {
3089+
check_serialization(&Options::builder().keyterms([]).build(), "");
3090+
3091+
check_serialization(
3092+
&Options::builder().keyterms(["hello"]).build(),
3093+
"keyterm=hello",
3094+
);
3095+
3096+
check_serialization(
3097+
&Options::builder().keyterms(["hello", "world"]).build(),
3098+
"keyterm=hello&keyterm=world",
3099+
);
3100+
3101+
// Test URL encoding of spaces
3102+
check_serialization(
3103+
&Options::builder().keyterms(["hello world"]).build(),
3104+
"keyterm=hello+world",
3105+
);
3106+
3107+
// Test with other features
3108+
check_serialization(
3109+
&Options::builder()
3110+
.model(Model::Nova3)
3111+
.language(Language::en)
3112+
.keyterms(["hello", "world"])
3113+
.punctuate(true)
3114+
.build(),
3115+
"model=nova-3&language=en&punctuate=true&keyterm=hello&keyterm=world",
3116+
);
3117+
3118+
// Test with multiple words per keyterm
3119+
check_serialization(
3120+
&Options::builder()
3121+
.keyterms(["hello world", "rust programming"])
3122+
.build(),
3123+
"keyterm=hello+world&keyterm=rust+programming",
3124+
);
3125+
}
29883126
}

0 commit comments

Comments
 (0)