Skip to content

Commit 17ce1f3

Browse files
Fixing various errors in the structured_data files (#2013)
* fixing errors in movielens_recommendations_transformers.py * fixing errors in wide_deep_cross_networks.py * fixing data download errors in feature_space_advanced.py * generating .ipynb and .md files for the movielens_recommendations_transformers.py * generating .ipynb and .md files for the wide_deep_cross_networks.py * generating .ipynb and .md files for the feature_space_advanced.py
1 parent 22554c6 commit 17ce1f3

12 files changed

+45902
-559
lines changed

examples/structured_data/feature_space_advanced.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Title: FeatureSpace advanced use cases
33
Author: [Dimitre Oliveira](https://www.linkedin.com/in/dimitre-oliveira-7a1a0113a/)
44
Date created: 2023/07/01
5-
Last modified: 2023/07/01
5+
Last modified: 2025/01/03
66
Description: How to use FeatureSpace for advanced preprocessing use cases.
77
Accelerator: None
88
"""
@@ -90,7 +90,7 @@
9090

9191
data_url = "https://archive.ics.uci.edu/static/public/222/bank+marketing.zip"
9292
data_zipped_path = keras.utils.get_file("bank_marketing.zip", data_url, extract=True)
93-
keras_datasets_path = Path(data_zipped_path).parents[0]
93+
keras_datasets_path = Path(data_zipped_path)
9494
with ZipFile(f"{keras_datasets_path}/bank-additional.zip", "r") as zip:
9595
# Extract files
9696
zip.extractall(path=keras_datasets_path)
@@ -538,7 +538,7 @@ def example_feature_space(dataset, feature_space, feature_names):
538538
"""
539539

540540
model.fit(
541-
preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=20, verbose=2
541+
preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=10, verbose=2
542542
)
543543

544544
"""
429 KB
Loading
1010 KB
Loading
468 KB
Loading

examples/structured_data/ipynb/feature_space_advanced.ipynb

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"\n",
1111
"**Author:** [Dimitre Oliveira](https://www.linkedin.com/in/dimitre-oliveira-7a1a0113a/)<br>\n",
1212
"**Date created:** 2023/07/01<br>\n",
13-
"**Last modified:** 2023/07/01<br>\n",
13+
"**Last modified:** 2025/01/03<br>\n",
1414
"**Description:** How to use FeatureSpace for advanced preprocessing use cases."
1515
]
1616
},
@@ -94,7 +94,7 @@
9494
},
9595
{
9696
"cell_type": "code",
97-
"execution_count": null,
97+
"execution_count": 0,
9898
"metadata": {
9999
"colab_type": "code"
100100
},
@@ -125,15 +125,15 @@
125125
},
126126
{
127127
"cell_type": "code",
128-
"execution_count": null,
128+
"execution_count": 0,
129129
"metadata": {
130130
"colab_type": "code"
131131
},
132132
"outputs": [],
133133
"source": [
134134
"data_url = \"https://archive.ics.uci.edu/static/public/222/bank+marketing.zip\"\n",
135135
"data_zipped_path = keras.utils.get_file(\"bank_marketing.zip\", data_url, extract=True)\n",
136-
"keras_datasets_path = Path(data_zipped_path).parents[0]\n",
136+
"keras_datasets_path = Path(data_zipped_path)\n",
137137
"with ZipFile(f\"{keras_datasets_path}/bank-additional.zip\", \"r\") as zip:\n",
138138
" # Extract files\n",
139139
" zip.extractall(path=keras_datasets_path)\n",
@@ -157,7 +157,7 @@
157157
},
158158
{
159159
"cell_type": "code",
160-
"execution_count": null,
160+
"execution_count": 0,
161161
"metadata": {
162162
"colab_type": "code"
163163
},
@@ -183,7 +183,7 @@
183183
},
184184
{
185185
"cell_type": "code",
186-
"execution_count": null,
186+
"execution_count": 0,
187187
"metadata": {
188188
"colab_type": "code"
189189
},
@@ -215,7 +215,7 @@
215215
},
216216
{
217217
"cell_type": "code",
218-
"execution_count": null,
218+
"execution_count": 0,
219219
"metadata": {
220220
"colab_type": "code"
221221
},
@@ -248,7 +248,7 @@
248248
},
249249
{
250250
"cell_type": "code",
251-
"execution_count": null,
251+
"execution_count": 0,
252252
"metadata": {
253253
"colab_type": "code"
254254
},
@@ -291,7 +291,7 @@
291291
},
292292
{
293293
"cell_type": "code",
294-
"execution_count": null,
294+
"execution_count": 0,
295295
"metadata": {
296296
"colab_type": "code"
297297
},
@@ -335,7 +335,7 @@
335335
},
336336
{
337337
"cell_type": "code",
338-
"execution_count": null,
338+
"execution_count": 0,
339339
"metadata": {
340340
"colab_type": "code"
341341
},
@@ -352,7 +352,8 @@
352352
" print(f\"Input: {[{k:v.numpy()} for k, v in inputs.items()]}\")\n",
353353
" print(\n",
354354
" f\"Preprocessed output: {[{k:v.numpy()} for k, v in preprocessed_x.items()]}\"\n",
355-
" )\n"
355+
" )\n",
356+
""
356357
]
357358
},
358359
{
@@ -380,7 +381,7 @@
380381
},
381382
{
382383
"cell_type": "code",
383-
"execution_count": null,
384+
"execution_count": 0,
384385
"metadata": {
385386
"colab_type": "code"
386387
},
@@ -406,7 +407,7 @@
406407
},
407408
{
408409
"cell_type": "code",
409-
"execution_count": null,
410+
"execution_count": 0,
410411
"metadata": {
411412
"colab_type": "code"
412413
},
@@ -436,7 +437,7 @@
436437
},
437438
{
438439
"cell_type": "code",
439-
"execution_count": null,
440+
"execution_count": 0,
440441
"metadata": {
441442
"colab_type": "code"
442443
},
@@ -476,7 +477,7 @@
476477
},
477478
{
478479
"cell_type": "code",
479-
"execution_count": null,
480+
"execution_count": 0,
480481
"metadata": {
481482
"colab_type": "code"
482483
},
@@ -514,7 +515,7 @@
514515
},
515516
{
516517
"cell_type": "code",
517-
"execution_count": null,
518+
"execution_count": 0,
518519
"metadata": {
519520
"colab_type": "code"
520521
},
@@ -563,7 +564,7 @@
563564
},
564565
{
565566
"cell_type": "code",
566-
"execution_count": null,
567+
"execution_count": 0,
567568
"metadata": {
568569
"colab_type": "code"
569570
},
@@ -612,7 +613,7 @@
612613
},
613614
{
614615
"cell_type": "code",
615-
"execution_count": null,
616+
"execution_count": 0,
616617
"metadata": {
617618
"colab_type": "code"
618619
},
@@ -650,7 +651,7 @@
650651
},
651652
{
652653
"cell_type": "code",
653-
"execution_count": null,
654+
"execution_count": 0,
654655
"metadata": {
655656
"colab_type": "code"
656657
},
@@ -722,7 +723,7 @@
722723
},
723724
{
724725
"cell_type": "code",
725-
"execution_count": null,
726+
"execution_count": 0,
726727
"metadata": {
727728
"colab_type": "code"
728729
},
@@ -748,7 +749,7 @@
748749
},
749750
{
750751
"cell_type": "code",
751-
"execution_count": null,
752+
"execution_count": 0,
752753
"metadata": {
753754
"colab_type": "code"
754755
},
@@ -778,7 +779,7 @@
778779
},
779780
{
780781
"cell_type": "code",
781-
"execution_count": null,
782+
"execution_count": 0,
782783
"metadata": {
783784
"colab_type": "code"
784785
},
@@ -806,7 +807,7 @@
806807
},
807808
{
808809
"cell_type": "code",
809-
"execution_count": null,
810+
"execution_count": 0,
810811
"metadata": {
811812
"colab_type": "code"
812813
},
@@ -836,7 +837,7 @@
836837
},
837838
{
838839
"cell_type": "code",
839-
"execution_count": null,
840+
"execution_count": 0,
840841
"metadata": {
841842
"colab_type": "code"
842843
},
@@ -858,7 +859,7 @@
858859
},
859860
{
860861
"cell_type": "code",
861-
"execution_count": null,
862+
"execution_count": 0,
862863
"metadata": {
863864
"colab_type": "code"
864865
},
@@ -886,14 +887,14 @@
886887
},
887888
{
888889
"cell_type": "code",
889-
"execution_count": null,
890+
"execution_count": 0,
890891
"metadata": {
891892
"colab_type": "code"
892893
},
893894
"outputs": [],
894895
"source": [
895896
"model.fit(\n",
896-
" preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=20, verbose=2\n",
897+
" preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=10, verbose=2\n",
897898
")"
898899
]
899900
},
@@ -924,7 +925,7 @@
924925
},
925926
{
926927
"cell_type": "code",
927-
"execution_count": null,
928+
"execution_count": 0,
928929
"metadata": {
929930
"colab_type": "code"
930931
},
@@ -947,7 +948,7 @@
947948
},
948949
{
949950
"cell_type": "code",
950-
"execution_count": null,
951+
"execution_count": 0,
951952
"metadata": {
952953
"colab_type": "code"
953954
},
@@ -1026,4 +1027,4 @@
10261027
},
10271028
"nbformat": 4,
10281029
"nbformat_minor": 0
1029-
}
1030+
}

examples/structured_data/ipynb/movielens_recommendations_transformers.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"\n",
1111
"**Author:** [Khalid Salama](https://www.linkedin.com/in/khalid-salama-24403144/)<br>\n",
1212
"**Date created:** 2020/12/30<br>\n",
13-
"**Last modified:** 2020/12/30<br>\n",
13+
"**Last modified:** 2025/01/03<br>\n",
1414
"**Description:** Rating rate prediction using the Behavior Sequence Transformer (BST) model on the Movielens."
1515
]
1616
},
@@ -429,7 +429,7 @@
429429
"outputs": [],
430430
"source": [
431431
"\n",
432-
"def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n",
432+
"def get_dataset_from_csv(csv_file_path, batch_size, shuffle=True):\n",
433433
" def process(features):\n",
434434
" movie_ids_string = features[\"sequence_movie_ids\"]\n",
435435
" sequence_movie_ids = tf.strings.split(movie_ids_string, \",\").to_tensor()\n",
@@ -447,7 +447,7 @@
447447
" target = sequence_ratings[:, -1]\n",
448448
" features[\"sequence_ratings\"] = sequence_ratings[:, :-1]\n",
449449
"\n",
450-
" return features, target\n",
450+
" return dict(features), target\n",
451451
"\n",
452452
" dataset = tf.data.experimental.make_csv_dataset(\n",
453453
" csv_file_path,\n",
@@ -759,10 +759,10 @@
759759
")\n",
760760
"\n",
761761
"# Read the training data.\n",
762-
"train_dataset = get_dataset_from_csv(\"train_data.csv\", shuffle=True, batch_size=265)\n",
762+
"train_dataset = get_dataset_from_csv(\"train_data.csv\", batch_size=265, shuffle=True)\n",
763763
"\n",
764764
"# Fit the model with the training data.\n",
765-
"model.fit(train_dataset, epochs=5)\n",
765+
"model.fit(train_dataset, epochs=2)\n",
766766
"\n",
767767
"# Read the test data.\n",
768768
"test_dataset = get_dataset_from_csv(\"test_data.csv\", batch_size=265)\n",

examples/structured_data/ipynb/wide_deep_cross_networks.ipynb

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"\n",
1111
"**Author:** [Khalid Salama](https://www.linkedin.com/in/khalid-salama-24403144/)<br>\n",
1212
"**Date created:** 2020/12/31<br>\n",
13-
"**Last modified:** 2021/05/05<br>\n",
13+
"**Last modified:** 2025/01/03<br>\n",
1414
"**Description:** Using Wide & Deep and Deep & Cross networks for structured data classification."
1515
]
1616
},
@@ -296,6 +296,11 @@
296296
},
297297
"outputs": [],
298298
"source": [
299+
"\n",
300+
"# To convert the datasets elements to from OrderedDict to Dictionary\n",
301+
"def process(features, target):\n",
302+
" return dict(features), target\n",
303+
"\n",
299304
"\n",
300305
"def get_dataset_from_csv(csv_file_path, batch_size, shuffle=False):\n",
301306
" dataset = tf_data.experimental.make_csv_dataset(\n",
@@ -307,7 +312,7 @@
307312
" num_epochs=1,\n",
308313
" header=True,\n",
309314
" shuffle=shuffle,\n",
310-
" )\n",
315+
" ).map(process)\n",
311316
" return dataset.cache()\n",
312317
""
313318
]
@@ -333,7 +338,7 @@
333338
"learning_rate = 0.001\n",
334339
"dropout_rate = 0.1\n",
335340
"batch_size = 265\n",
336-
"num_epochs = 50\n",
341+
"num_epochs = 1\n",
337342
"\n",
338343
"hidden_units = [32, 32]\n",
339344
"\n",

0 commit comments

Comments
 (0)