@@ -29,6 +29,8 @@ limitations under the License.
29
29
#include " tensorflow_serving/apis/inference.pb.h"
30
30
#include " tensorflow_serving/apis/predict.pb.h"
31
31
#include " tensorflow_serving/apis/regression.pb.h"
32
+ #include " tensorflow_serving/servables/tensorflow/predict_response_tensor_serialization_option.h"
33
+ #include " tensorflow_serving/servables/tensorflow/thread_pool_factory.h"
32
34
33
35
namespace tensorflow {
34
36
namespace serving {
@@ -48,13 +50,27 @@ class Servable {
48
50
// Returns the version associated with this servable.
49
51
int64_t version () const { return version_; }
50
52
51
- virtual absl::Status Classify (const ClassificationRequest& request,
53
+ // RunOptions group the configuration for individual inference executions.
54
+ // The per-request configuration (e.g. deadline) can be passed here.
55
+ struct RunOptions {
56
+ // Priority of the request. Some thread pool implementation will schedule
57
+ // ops based on the priority number. Larger number means higher
58
+ // priority.
59
+ int64_t priority = 1 ;
60
+ // The deadline for this request.
61
+ absl::Time deadline = absl::InfiniteFuture();
62
+ };
63
+
64
+ virtual absl::Status Classify (const RunOptions& run_options,
65
+ const ClassificationRequest& request,
52
66
ClassificationResponse* response) = 0;
53
67
54
- virtual absl::Status Regress (const RegressionRequest& request,
68
+ virtual absl::Status Regress (const RunOptions& run_options,
69
+ const RegressionRequest& request,
55
70
RegressionResponse* response) = 0;
56
71
57
- virtual absl::Status Predict (const PredictRequest& request,
72
+ virtual absl::Status Predict (const RunOptions& run_options,
73
+ const PredictRequest& request,
58
74
PredictResponse* response) = 0;
59
75
60
76
// Streamed version of `Predict`. Experimental API that is not yet part of the
@@ -67,10 +83,11 @@ class Servable {
67
83
// callback invocation to be delayed. The implementation guarantees that the
68
84
// callback is never called after the `PredictStreamed` method returns.
69
85
virtual absl::Status PredictStreamed (
70
- const PredictRequest& request,
86
+ const RunOptions& run_options, const PredictRequest& request,
71
87
absl::AnyInvocable<void (PredictResponse)> response_callback) = 0;
72
88
73
- virtual absl::Status MultiInference (const MultiInferenceRequest& request,
89
+ virtual absl::Status MultiInference (const RunOptions& run_options,
90
+ const MultiInferenceRequest& request,
74
91
MultiInferenceResponse* response) = 0;
75
92
76
93
virtual absl::Status GetModelMetadata (const GetModelMetadataRequest& request,
@@ -95,28 +112,32 @@ class EmptyServable : public Servable {
95
112
public:
96
113
EmptyServable ();
97
114
98
- absl::Status Classify (const ClassificationRequest& request,
115
+ absl::Status Classify (const RunOptions& run_options,
116
+ const ClassificationRequest& request,
99
117
ClassificationResponse* response) override {
100
118
return error_;
101
119
}
102
120
103
- absl::Status Regress (const RegressionRequest& request,
121
+ absl::Status Regress (const RunOptions& run_options,
122
+ const RegressionRequest& request,
104
123
RegressionResponse* response) override {
105
124
return error_;
106
125
}
107
126
108
- absl::Status Predict (const PredictRequest& request,
127
+ absl::Status Predict (const RunOptions& run_options,
128
+ const PredictRequest& request,
109
129
PredictResponse* response) override {
110
130
return error_;
111
131
}
112
132
113
133
absl::Status PredictStreamed (
114
- const PredictRequest& request,
134
+ const RunOptions& run_options, const PredictRequest& request,
115
135
absl::AnyInvocable<void (PredictResponse)> response_callback) override {
116
136
return error_;
117
137
}
118
138
119
- absl::Status MultiInference (const MultiInferenceRequest& request,
139
+ absl::Status MultiInference (const RunOptions& run_options,
140
+ const MultiInferenceRequest& request,
120
141
MultiInferenceResponse* response) override {
121
142
return error_;
122
143
}
0 commit comments