Skip to content

Commit fb793c3

Browse files
authored
AutoKeras 3.0 updates (#1951)
* use data.Dataset for all * some tests passed * more tests fixed * more tests fixed. * image classifier fixed. * remove data * text fixed * removed keras_nlp * fix one hot encoder. * More tests fixed * All tests fixed. * Add structured data (#1950) * patch in progress * patch in progress 2 * progress 3 * tests runnable * remove tf usages * structured data working * all tests passed * docs works * structured data added to docs * clean ups * Run tests with torch * add grpc as a dependency * add grpc and protobuf to test install * fix the pip install grpcio * addressing comments.
1 parent db78b44 commit fb793c3

File tree

95 files changed

+2457
-2436
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+2457
-2436
lines changed

.github/workflows/actions.yml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ jobs:
1010
build:
1111
name: Run tests
1212
runs-on: ubuntu-latest
13+
env:
14+
KERAS_BACKEND: torch
1315
steps:
1416
- uses: actions/checkout@v3
1517
- name: Set up Python 3.10
@@ -28,7 +30,8 @@ jobs:
2830
key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
2931
- name: Install dependencies
3032
run: |
31-
pip install tensorflow
33+
pip install torch --index-url https://download.pytorch.org/whl/cpu
34+
pip install grpcio protobuf
3235
pip install -e ".[tests]" --progress-bar off
3336
- name: Test with pytest
3437
run: |
@@ -38,13 +41,17 @@ jobs:
3841
format:
3942
name: Check the code format
4043
runs-on: ubuntu-latest
44+
env:
45+
KERAS_BACKEND: torch
4146
steps:
4247
- uses: actions/checkout@v3
4348
- name: Run pre-commit
4449
run: bash shell/pre-commit.sh
4550
build-docs:
4651
name: Build the docs
4752
runs-on: ubuntu-latest
53+
env:
54+
KERAS_BACKEND: torch
4855
steps:
4956
- uses: actions/checkout@v3
5057
- name: Set up Python 3.10
@@ -54,7 +61,8 @@ jobs:
5461
- name: Install dependencies
5562
run: |
5663
python -m pip install --upgrade pip setuptools
57-
pip install tensorflow
64+
pip install torch --index-url https://download.pytorch.org/whl/cpu
65+
pip install grpcio protobuf
5866
pip install -e .
5967
pip install -r docs/requirements.txt
6068
- name: Build the docs
@@ -66,6 +74,8 @@ jobs:
6674
needs: [build, format, build-docs]
6775
if: github.event_name == 'release' && github.event.action == 'created'
6876
runs-on: ubuntu-latest
77+
env:
78+
KERAS_BACKEND: torch
6979
steps:
7080
- uses: actions/checkout@v3
7181
- name: Set up Python

autokeras/__init__.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import keras_nlp
16-
1715
from autokeras.auto_model import AutoModel
18-
from autokeras.blocks import BertBlock
1916
from autokeras.blocks import ClassificationHead
2017
from autokeras.blocks import ConvBlock
2118
from autokeras.blocks import DenseBlock
2219
from autokeras.blocks import EfficientNetBlock
20+
from autokeras.blocks import Embedding
2321
from autokeras.blocks import Flatten
2422
from autokeras.blocks import ImageAugmentation
2523
from autokeras.blocks import ImageBlock
@@ -29,6 +27,7 @@
2927
from autokeras.blocks import ResNetBlock
3028
from autokeras.blocks import RNNBlock
3129
from autokeras.blocks import SpatialReduction
30+
from autokeras.blocks import StructuredDataBlock
3231
from autokeras.blocks import TemporalReduction
3332
from autokeras.blocks import TextBlock
3433
from autokeras.blocks import XceptionBlock
@@ -39,23 +38,22 @@
3938
from autokeras.keras_layers import ExpandLastDim
4039
from autokeras.nodes import ImageInput
4140
from autokeras.nodes import Input
41+
from autokeras.nodes import StructuredDataInput
4242
from autokeras.nodes import TextInput
4343
from autokeras.tasks import ImageClassifier
4444
from autokeras.tasks import ImageRegressor
45+
from autokeras.tasks import StructuredDataClassifier
46+
from autokeras.tasks import StructuredDataRegressor
4547
from autokeras.tasks import TextClassifier
4648
from autokeras.tasks import TextRegressor
4749
from autokeras.tuners import BayesianOptimization
4850
from autokeras.tuners import Greedy
4951
from autokeras.tuners import Hyperband
5052
from autokeras.tuners import RandomSearch
51-
from autokeras.utils.io_utils import image_dataset_from_directory
52-
from autokeras.utils.io_utils import text_dataset_from_directory
5353

54-
__version__ = "2.1.0dev"
54+
__version__ = "3.0.0dev"
5555

5656
CUSTOM_OBJECTS = {
57-
"BertPreprocessor": keras_nlp.models.BertPreprocessor,
58-
"BertBackbone": keras_nlp.models.BertBackbone,
5957
"CastToFloat32": CastToFloat32,
6058
"ExpandLastDim": ExpandLastDim,
6159
}

autokeras/adapters/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from autokeras.adapters.input_adapters import ImageAdapter
1616
from autokeras.adapters.input_adapters import InputAdapter
17+
from autokeras.adapters.input_adapters import StructuredDataAdapter
1718
from autokeras.adapters.input_adapters import TextAdapter
1819
from autokeras.adapters.output_adapters import ClassificationAdapter
1920
from autokeras.adapters.output_adapters import RegressionAdapter

autokeras/adapters/input_adapters.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,17 @@
1313
# limitations under the License.
1414

1515
import numpy as np
16-
import tensorflow as tf
1716

1817
from autokeras.engine import adapter as adapter_module
1918

2019

2120
class InputAdapter(adapter_module.Adapter):
2221
def check(self, x):
2322
"""Record any information needed by transform."""
24-
if not isinstance(x, (np.ndarray, tf.data.Dataset)):
23+
if not isinstance(x, np.ndarray):
2524
raise TypeError(
26-
"Expect the data to Input to be numpy.ndarray or "
27-
"tf.data.Dataset, but got {type}.".format(type=type(x))
25+
"Expect the data to Input to be numpy.ndarray, "
26+
"but got {type}.".format(type=type(x))
2827
)
2928
if isinstance(x, np.ndarray) and not np.issubdtype(x.dtype, np.number):
3029
raise TypeError(
@@ -36,10 +35,10 @@ def check(self, x):
3635
class ImageAdapter(adapter_module.Adapter):
3736
def check(self, x):
3837
"""Record any information needed by transform."""
39-
if not isinstance(x, (np.ndarray, tf.data.Dataset)):
38+
if not isinstance(x, np.ndarray):
4039
raise TypeError(
41-
"Expect the data to ImageInput to be numpy.ndarray or "
42-
"tf.data.Dataset, but got {type}.".format(type=type(x))
40+
"Expect the data to ImageInput to be numpy.ndarray, "
41+
"but got {type}.".format(type=type(x))
4342
)
4443
if isinstance(x, np.ndarray) and not np.issubdtype(x.dtype, np.number):
4544
raise TypeError(
@@ -51,8 +50,17 @@ def check(self, x):
5150
class TextAdapter(adapter_module.Adapter):
5251
def check(self, x):
5352
"""Record any information needed by transform."""
54-
if not isinstance(x, (np.ndarray, tf.data.Dataset)):
53+
if not isinstance(x, np.ndarray):
5554
raise TypeError(
56-
"Expect the data to TextInput to be numpy.ndarray or "
57-
"tf.data.Dataset, but got {type}.".format(type=type(x))
55+
"Expect the data to TextInput to be numpy.ndarray, "
56+
"but got {type}.".format(type=type(x))
57+
)
58+
59+
60+
class StructuredDataAdapter(adapter_module.Adapter):
61+
def check(self, x):
62+
if not isinstance(x, np.ndarray):
63+
raise TypeError(
64+
"Unsupported type {type} for "
65+
"{name}.".format(type=type(x), name=self.__class__.__name__)
5866
)

autokeras/adapters/input_adapters_test.py

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515

1616
import numpy as np
17+
import pandas as pd
1718
import pytest
18-
import tensorflow as tf
1919

2020
from autokeras import test_utils
2121
from autokeras.adapters import input_adapters
@@ -25,73 +25,70 @@
2525
def test_image_input_adapter_transform_to_dataset():
2626
x = test_utils.generate_data()
2727
adapter = input_adapters.ImageAdapter()
28-
assert isinstance(adapter.adapt(x, batch_size=32), tf.data.Dataset)
28+
assert isinstance(adapter.adapt(x), np.ndarray)
2929

3030

3131
def test_image_input_unsupported_type():
3232
x = "unknown"
3333
adapter = input_adapters.ImageAdapter()
3434
with pytest.raises(TypeError) as info:
35-
x = adapter.adapt(x, batch_size=32)
35+
x = adapter.adapt(x)
3636
assert "Expect the data to ImageInput to be numpy" in str(info.value)
3737

3838

3939
def test_image_input_numerical():
4040
x = np.array([[["unknown"]]])
4141
adapter = input_adapters.ImageAdapter()
4242
with pytest.raises(TypeError) as info:
43-
x = adapter.adapt(x, batch_size=32)
43+
x = adapter.adapt(x)
4444
assert "Expect the data to ImageInput to be numerical" in str(info.value)
4545

4646

4747
def test_input_type_error():
4848
x = "unknown"
4949
adapter = input_adapters.InputAdapter()
5050
with pytest.raises(TypeError) as info:
51-
x = adapter.adapt(x, batch_size=32)
51+
x = adapter.adapt(x)
5252
assert "Expect the data to Input to be numpy" in str(info.value)
5353

5454

5555
def test_input_numerical():
5656
x = np.array([[["unknown"]]])
5757
adapter = input_adapters.InputAdapter()
5858
with pytest.raises(TypeError) as info:
59-
x = adapter.adapt(x, batch_size=32)
59+
x = adapter.adapt(x)
6060
assert "Expect the data to Input to be numerical" in str(info.value)
6161

6262

63-
def test_text_adapt_unbatched_dataset():
64-
x = tf.data.Dataset.from_tensor_slices(np.array(["a b c", "b b c"]))
65-
adapter = input_adapters.TextAdapter()
66-
x = adapter.adapt(x, batch_size=32)
67-
68-
assert data_utils.dataset_shape(x).as_list() == [None]
69-
assert isinstance(x, tf.data.Dataset)
70-
71-
72-
def test_text_adapt_batched_dataset():
73-
x = tf.data.Dataset.from_tensor_slices(np.array(["a b c", "b b c"])).batch(
74-
32
75-
)
76-
adapter = input_adapters.TextAdapter()
77-
x = adapter.adapt(x, batch_size=32)
78-
79-
assert data_utils.dataset_shape(x).as_list() == [None]
80-
assert isinstance(x, tf.data.Dataset)
81-
82-
8363
def test_text_adapt_np():
8464
x = np.array(["a b c", "b b c"])
8565
adapter = input_adapters.TextAdapter()
86-
x = adapter.adapt(x, batch_size=32)
66+
x = adapter.adapt(x)
8767

88-
assert data_utils.dataset_shape(x).as_list() == [None]
89-
assert isinstance(x, tf.data.Dataset)
68+
assert data_utils.dataset_shape(x) == [2]
69+
assert isinstance(x, np.ndarray)
9070

9171

9272
def test_text_input_type_error():
9373
x = "unknown"
9474
adapter = input_adapters.TextAdapter()
9575
with pytest.raises(TypeError) as info:
96-
x = adapter.adapt(x, batch_size=32)
76+
x = adapter.adapt(x)
9777
assert "Expect the data to TextInput to be numpy" in str(info.value)
78+
79+
80+
def test_structured_data_input_unsupported_type_error():
81+
with pytest.raises(TypeError) as info:
82+
adapter = input_adapters.StructuredDataAdapter()
83+
adapter.adapt("unknown")
84+
85+
assert "Unsupported type" in str(info.value)
86+
87+
88+
def test_structured_data_input_transform_to_dataset():
89+
x = pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)
90+
adapter = input_adapters.StructuredDataAdapter()
91+
92+
x = adapter.adapt(x)
93+
94+
assert isinstance(x, np.ndarray)

autokeras/adapters/output_adapters.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414

1515
import numpy as np
16-
import tensorflow as tf
1716

1817
from autokeras.engine import adapter as adapter_module
1918

@@ -24,10 +23,9 @@ def __init__(self, name, **kwargs):
2423
self.name = name
2524

2625
def check(self, dataset):
27-
supported_types = (tf.data.Dataset, np.ndarray)
28-
if not isinstance(dataset, supported_types):
26+
if not isinstance(dataset, np.ndarray):
2927
raise TypeError(
30-
f"Expect the target data of {self.name} to be tf.data.Dataset,"
28+
f"Expect the target data of {self.name} to be"
3129
f" np.ndarray, but got {type(dataset)}."
3230
)
3331

autokeras/adapters/output_adapters_test.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
import numpy as np
1616
import pytest
17-
import tensorflow as tf
1817

1918
from autokeras.adapters import output_adapters
2019

@@ -23,14 +22,14 @@ def test_unsupported_types_error():
2322
adapter = output_adapters.ClassificationAdapter(name="a")
2423

2524
with pytest.raises(TypeError) as info:
26-
adapter.adapt(1, batch_size=32)
25+
adapter.adapt(1)
2726

28-
assert "Expect the target data of a to be tf" in str(info.value)
27+
assert "Expect the target data of a to be" in str(info.value)
2928

3029

3130
def test_reg_head_transform_1d_np():
3231
adapter = output_adapters.RegressionAdapter(name="a")
3332

34-
y = adapter.adapt(np.random.rand(10), batch_size=32)
33+
y = adapter.adapt(np.random.rand(10))
3534

36-
assert isinstance(y, tf.data.Dataset)
35+
assert isinstance(y, np.ndarray)

autokeras/analysers/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from autokeras.analysers.input_analysers import CATEGORICAL
16+
from autokeras.analysers.input_analysers import NUMERICAL
1517
from autokeras.analysers.input_analysers import ImageAnalyser
1618
from autokeras.analysers.input_analysers import InputAnalyser
19+
from autokeras.analysers.input_analysers import StructuredDataAnalyser
1720
from autokeras.analysers.input_analysers import TextAnalyser
1821
from autokeras.analysers.output_analysers import ClassificationAnalyser
1922
from autokeras.analysers.output_analysers import RegressionAnalyser

0 commit comments

Comments
 (0)