Skip to content
This repository was archived by the owner on Dec 2, 2024. It is now read-only.

Commit 27fd0cf

Browse files
authored
Milestone 1: Updating Oppia-ML pipeline (#32)
* Milestone 1: Updating Oppia-ML pipeline * Disable lint (no-name-in-module) for importing generated proto files * Refactor protobuf implementation and address review comments * Fix lint tests * Addressed review comments * Fix __init__.py inclusion * Address review comments * Address Review Comments * Correct doc string * Nit changes
1 parent 425bd4a commit 27fd0cf

20 files changed

+387
-12
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33
*.swp
44
*.swo
55
third_party/*
6+
core/domain/proto/*.py
7+
!core/domain/proto/__init__.py

core/classifiers/CodeClassifier/CodeClassifier.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,22 @@ def __init__(self):
538538
# text into a feature a vector.
539539
self.count_vector = None
540540

541+
@property
542+
def name_in_job_result_proto(self):
543+
# This property needs to be defined as it is defined as
544+
# abstract property in BaseClassifier. However, since code classifier
545+
# is currently disabled, the proto message definitions for code
546+
# classifier will not be added until it is re-implemented and enabled.
547+
return 'code_classifier'
548+
549+
@property
550+
def type_in_job_result_proto(self):
551+
# This property needs to be defined as it is defined as
552+
# abstract property in BaseClassifier. However, since code classifier
553+
# is currently disabled, the proto message definitions for code
554+
# classifier will not be added until it is re-implemented and enabled.
555+
return '%sFrozenModel' % self.__class__.__name__
556+
541557
def to_dict(self):
542558
"""Returns a dict representing this classifier.
543559

core/classifiers/TextClassifier/TextClassifier.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class TextClassifier(base.BaseClassifier):
3535
Support Vector Classifier (SVC) to obtain the best model using the linear
3636
kernel.
3737
"""
38+
3839
def __init__(self):
3940
super(TextClassifier, self).__init__()
4041
# sklearn.svm.SVC classifier object.
@@ -54,6 +55,14 @@ def __init__(self):
5455
# Time taken to train the classifier
5556
self.exec_time = None
5657

58+
@property
59+
def name_in_job_result_proto(self):
60+
return 'text_classifier'
61+
62+
@property
63+
def type_in_job_result_proto(self):
64+
return '%sFrozenModel' % (self.__class__.__name__)
65+
5766
def train(self, training_data):
5867
"""Trains classifier using given training_data.
5968

core/classifiers/base.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,22 @@ class BaseClassifier(object):
3636
def __init__(self):
3737
pass
3838

39+
@property
40+
@abc.abstractproperty
41+
def name_in_job_result_proto(self):
42+
"""A property that identifies the attribute in job result proto message
43+
which will store this classifier's classifier data.
44+
"""
45+
raise NotImplementedError
46+
47+
@property
48+
@abc.abstractproperty
49+
def type_in_job_result_proto(self):
50+
"""The type of the property in job result proto message which stores
51+
this classifier's classifier data.
52+
"""
53+
raise NotImplementedError
54+
3955
@abc.abstractmethod
4056
def to_dict(self):
4157
"""Returns a dict representing this classifier.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from core.classifiers import algorithm_registry
2020
from core.classifiers import classifier_utils
21-
from core.services import remote_access_services
21+
from core.domain import remote_access_services
2222

2323
# pylint: disable=too-many-branches
2424
def _validate_job_data(job_data):

core/domain/proto/__init__.py

Whitespace-only changes.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// coding: utf-8
2+
//
3+
// Copyright 2020 The Oppia Authors. All Rights Reserved.
4+
//
5+
// Licensed under the Apache License, Version 2.0 (the "License");
6+
// you may not use this file except in compliance with the License.
7+
// You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS-IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
syntax = "proto3";
18+
19+
message TextClassifierFrozenModel {
20+
// The parameters of a trained text classifier model which are necessary
21+
// for inference.
22+
string model_json = 1;
23+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// coding: utf-8
2+
//
3+
// Copyright 2020 The Oppia Authors. All Rights Reserved.
4+
//
5+
// Licensed under the Apache License, Version 2.0 (the "License");
6+
// you may not use this file except in compliance with the License.
7+
// You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS-IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
syntax = "proto3";
18+
19+
import "core/domain/proto/text_classifier.proto";
20+
21+
// Training job response payload contains job result of the training job
22+
// along with other metadata items such as vm_id (to identify which VM executed
23+
// this job) and signature of the payload for security purpose.
24+
message TrainingJobResponsePayload {
25+
// Job result of the training job. Job result contains the ID of the Job and
26+
// trained model (frozen model) of the job.
27+
message JobResult {
28+
// Id of the training job whose data is being stored.
29+
string job_id = 1;
30+
31+
// Each of the classifier algorithms' proto message must be present in
32+
// the oneof classifier_data field.
33+
oneof classifier_frozen_model {
34+
TextClassifierFrozenModel text_classifier = 2;
35+
}
36+
}
37+
JobResult job_result = 1;
38+
39+
// Id of the VM instance that trained the job.
40+
string vm_id = 2;
41+
42+
// Signature of the job data for authenticated communication.
43+
string signature = 3;
44+
}

0 commit comments

Comments
 (0)