1717
1818package ai .picovoice .rhino ;
1919
20- import java .util .HashMap ;
20+ import java .util .LinkedHashMap ;
2121import java .util .Map ;
2222
2323/**
24- * Binding for Picovoice's speech-to-intent engine (aka Rhino).
25- * The object directly infers intent from speech commands within a given context of interest in
26- * real-time. It processes incoming audio in consecutive frames (chunks) and at the end of each
27- * frame indicates if the intent extraction is finalized. When finalized, the intent can be
28- * retrieved as structured data in form of an intent string and pairs of slots and values
29- * representing arguments (details) of intent. The number of samples per frame can be attained by
30- * calling {@link #frameLength()}. The incoming audio needs to have a sample rate equal to
31- * {@link #sampleRate()} and be 16-bit linearly-encoded. Furthermore, Rhino operates on single
32- * channel audio.
24+ * Binding for Picovoice's speech-to-intent engine (aka Rhino). The object directly infers intent
25+ * from speech commands within a given context of interest in real-time. It processes incoming audio
26+ * in consecutive frames (chunks) and at the end of each frame indicates if the intent extraction is
27+ * finalized. When finalized, the intent can be retrieved as structured data in form of an intent
28+ * string and pairs of slots and values representing arguments (details) of intent. The number of
29+ * samples per frame can be attained by calling {@link #frameLength()}. The incoming audio needs to
30+ * have a sample rate equal to {@link #sampleRate()} and be 16-bit linearly-encoded. Furthermore,
31+ * Rhino operates on single channel audio.
3332 */
3433public class Rhino {
3534 static {
@@ -40,10 +39,11 @@ public class Rhino {
4039
4140 /**
4241 * Constructor.
43- * @param modelFilePath Absolute path to file containing model parameters.
44- * @param contextFilePath Absolute path to file containing context parameters. A context
45- * represents the set of expressions (commands), intents, and intent
46- * arguments (slots) within a domain of interest.
42+ *
43+ * @param modelFilePath Absolute path to file containing model parameters.
44+ * @param contextFilePath Absolute path to file containing context parameters. A context
45+ * represents the set of expressions (commands), intents, and intent
46+ * arguments (slots) within a domain of interest.
4747 * @throws RhinoException On failure.
4848 */
4949 public Rhino (String modelFilePath , String contextFilePath ) throws RhinoException {
@@ -55,7 +55,8 @@ public Rhino(String modelFilePath, String contextFilePath) throws RhinoException
5555 }
5656
5757 /**
58- * Destructor. This is needs to be called explicitly as we do not rely on garbage collector.
58+ * Destructor. This needs to be called explicitly as we do not rely on garbage collector.
59+ *
5960 * @throws RhinoException On failure.
6061 */
6162 public void delete () throws RhinoException {
@@ -69,7 +70,8 @@ public void delete() throws RhinoException {
6970 /**
7071 * Processes a frame of audio and emits a flag indicating if the engine has finalized intent
7172 * extraction. When finalized, {@link #isUnderstood()} should be called to check if the command
72- * was valid (is within context of interest).
73+ * was valid (is within context of interest) and is understood.
74+ *
7375 * @param pcm A frame of audio samples. The number of samples per frame can be attained by
7476 * calling {@link #frameLength()}. The incoming audio needs to have a sample rate
7577 * equal to {@link #sampleRate()} and be 16-bit linearly-encoded. Furthermore,
@@ -79,7 +81,7 @@ public void delete() throws RhinoException {
7981 */
8082 public boolean process (short [] pcm ) throws RhinoException {
8183 try {
82- return process (object , pcm ) == 1 ;
84+ return process (object , pcm );
8385 } catch (Exception e ) {
8486 throw new RhinoException (e );
8587 }
@@ -88,13 +90,14 @@ public boolean process(short[] pcm) throws RhinoException {
8890 /**
8991 * Indicates if the spoken command is valid, is within the domain of interest (context), and the
9092 * engine understood it.
93+ *
9194 * @return Flag indicating if the spoken command is valid, is within the domain of interest
9295 * (context), and the engine understood it.
9396 * @throws RhinoException On failure.
9497 */
9598 public boolean isUnderstood () throws RhinoException {
9699 try {
97- return isUnderstood (object ) == 1 ;
100+ return isUnderstood (object );
98101 } catch (Exception e ) {
99102 throw new RhinoException (e );
100103 }
@@ -105,21 +108,22 @@ public boolean isUnderstood() throws RhinoException {
105108 * string and pairs of slots and their values. It should be called only after intent extraction
106109 * is finalized and it is verified that the spoken command is valid and understood via calling
107110 * {@link #isUnderstood()}.
111+ *
108112 * @return Inferred intent object.
109113 * @throws RhinoException On failure.
110114 */
111115 public RhinoIntent getIntent () throws RhinoException {
112116 final String intentPacked = getIntent (object );
113117 String [] parts = intentPacked .split ("," );
114118 if (parts .length == 0 ) {
115- throw new RhinoException (String .format ("Failed to retrieve intent from %s" , intentPacked ));
119+ throw new RhinoException (String .format ("failed to retrieve intent from %s" , intentPacked ));
116120 }
117121
118- Map <String , String > slots = new HashMap <>();
122+ Map <String , String > slots = new LinkedHashMap <>();
119123 for (int i = 1 ; i < parts .length ; i ++) {
120124 String [] slotAndValue = parts [i ].split (":" );
121125 if (slotAndValue .length != 2 ) {
122- throw new RhinoException (String .format ("Failed to retrieve intent from %s" , intentPacked ));
126+ throw new RhinoException (String .format ("failed to retrieve intent from %s" , intentPacked ));
123127 }
124128 slots .put (slotAndValue [0 ], slotAndValue [1 ]);
125129 }
@@ -130,6 +134,7 @@ public RhinoIntent getIntent() throws RhinoException {
130134 /**
131135 * Resets the internal state of the engine. It should be called before the engine can be used to
132136 * infer intent from a new stream of audio.
137+ *
133138 * @throws RhinoException On failure.
134139 */
135140 public void reset () throws RhinoException {
@@ -143,6 +148,7 @@ public void reset() throws RhinoException {
143148 /**
144149 * Getter for expressions. Each expression maps a set of spoken phrases to an intent and
145150 * possibly a number of slots (intent arguments).
151+ *
146152 * @return Expressions.
147153 * @throws RhinoException On failure.
148154 */
@@ -154,35 +160,38 @@ public String getContextExpressions() throws RhinoException {
154160 }
155161 }
156162
157- private native long init (String model_file_path , String context_file_path );
158-
159- private native long delete (long object );
160-
161- private native int process (long object , short [] pcm );
162-
163- private native int isUnderstood (long object );
164-
165- private native String getIntent (long object );
166-
167- private native boolean reset (long object );
168-
169- private native String contextExpressions (long object );
170-
171163 /**
172164 * Getter for length (number of audio samples) per frame.
165+ *
173166 * @return Frame length.
174167 */
175168 public native int frameLength ();
176169
177170 /**
178171 * Audio sample rate accepted by Picovoice.
172+ *
179173 * @return Sample rate.
180174 */
181175 public native int sampleRate ();
182176
183177 /**
184178 * Getter for version string.
179+ *
185180 * @return Version string.
186181 */
187182 public native String version ();
183+
184+ private native long init (String model_file_path , String context_file_path );
185+
186+ private native void delete (long object );
187+
188+ private native boolean process (long object , short [] pcm );
189+
190+ private native boolean isUnderstood (long object );
191+
192+ private native String getIntent (long object );
193+
194+ private native boolean reset (long object );
195+
196+ private native String contextExpressions (long object );
188197}
0 commit comments